newspaper_works 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +5 -5
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +4 -0
  4. data/.travis.yml +2 -2
  5. data/README.md +14 -13
  6. data/app/services/newspaper_works/jp2_derivative_service.rb +1 -3
  7. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +37 -15
  8. data/app/services/newspaper_works/pdf_derivative_service.rb +4 -7
  9. data/app/services/newspaper_works/tiff_derivative_service.rb +5 -9
  10. data/app/views/newspaper_works/base/_attribute_rows.html.erb +72 -24
  11. data/config/locales/newspaper_article.de.yml +1 -1
  12. data/config/locales/newspaper_article.en.yml +1 -1
  13. data/config/locales/newspaper_article.es.yml +1 -1
  14. data/config/locales/newspaper_article.fr.yml +1 -1
  15. data/config/locales/newspaper_article.it.yml +1 -1
  16. data/config/locales/newspaper_article.pt-BR.yml +1 -1
  17. data/config/locales/newspaper_article.zh.yml +1 -1
  18. data/config/locales/newspaper_container.de.yml +1 -1
  19. data/config/locales/newspaper_container.en.yml +1 -1
  20. data/config/locales/newspaper_container.es.yml +1 -1
  21. data/config/locales/newspaper_container.fr.yml +1 -1
  22. data/config/locales/newspaper_container.it.yml +1 -1
  23. data/config/locales/newspaper_container.pt-BR.yml +1 -1
  24. data/config/locales/newspaper_container.zh.yml +1 -1
  25. data/config/locales/newspaper_issue.de.yml +1 -1
  26. data/config/locales/newspaper_issue.en.yml +1 -1
  27. data/config/locales/newspaper_issue.es.yml +1 -1
  28. data/config/locales/newspaper_issue.fr.yml +1 -1
  29. data/config/locales/newspaper_issue.it.yml +2 -2
  30. data/config/locales/newspaper_issue.pt-BR.yml +2 -2
  31. data/config/locales/newspaper_issue.zh.yml +2 -2
  32. data/config/locales/newspaper_page.de.yml +1 -1
  33. data/config/locales/newspaper_page.en.yml +1 -1
  34. data/config/locales/newspaper_page.es.yml +1 -1
  35. data/config/locales/newspaper_page.fr.yml +1 -1
  36. data/config/locales/newspaper_page.it.yml +1 -1
  37. data/config/locales/newspaper_page.pt-BR.yml +1 -1
  38. data/config/locales/newspaper_page.zh.yml +1 -1
  39. data/config/locales/newspaper_title.de.yml +1 -1
  40. data/config/locales/newspaper_title.en.yml +1 -1
  41. data/config/locales/newspaper_title.es.yml +1 -1
  42. data/config/locales/newspaper_title.fr.yml +1 -1
  43. data/config/locales/newspaper_title.it.yml +1 -1
  44. data/config/locales/newspaper_title.pt-BR.yml +1 -1
  45. data/config/locales/newspaper_title.zh.yml +1 -1
  46. data/config/locales/newspaper_works.de.yml +98 -0
  47. data/config/locales/newspaper_works.en.yml +67 -0
  48. data/config/locales/newspaper_works.es.yml +96 -0
  49. data/config/locales/newspaper_works.fr.yml +97 -0
  50. data/config/locales/newspaper_works.it.yml +90 -0
  51. data/config/locales/newspaper_works.pt-BR.yml +96 -0
  52. data/config/locales/newspaper_works.zh.yml +90 -0
  53. data/config/vendor/fits.xml +55 -0
  54. data/config/vendor/imagemagick-6-policy.xml +39 -39
  55. data/lib/newspaper_works.rb +2 -0
  56. data/lib/newspaper_works/image_tool.rb +119 -0
  57. data/lib/newspaper_works/jp2_image_metadata.rb +81 -0
  58. data/lib/newspaper_works/text_extraction.rb +1 -0
  59. data/lib/newspaper_works/text_extraction/hocr_reader.rb +173 -0
  60. data/lib/newspaper_works/text_extraction/page_ocr.rb +37 -51
  61. data/lib/newspaper_works/text_extraction/render_alto.rb +4 -4
  62. data/lib/newspaper_works/version.rb +1 -1
  63. data/newspaper_works.gemspec +2 -3
  64. data/spec/features/search_results_thumbnail_highlights_spec.rb +1 -1
  65. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  66. data/spec/lib/newspaper_works/image_tool_spec.rb +109 -0
  67. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +3 -3
  68. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +2 -2
  69. data/spec/lib/newspaper_works/jp2_image_metadata_spec.rb +37 -0
  70. data/spec/lib/newspaper_works/text_extraction/hocr_reader_spec.rb +45 -0
  71. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +3 -3
  72. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +14 -14
  73. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +10 -13
  74. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +10 -8
  75. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +11 -7
  76. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +17 -10
  77. data/spec/spec_helper.rb +19 -0
  78. metadata +21 -22
@@ -2,6 +2,58 @@
2
2
  fr:
3
3
  newspaper_works:
4
4
  attributes:
5
+ held_by:
6
+ label: 'Lieu de détention'
7
+ identifier:
8
+ label: 'Identifiant (local)'
9
+ issn:
10
+ label: 'ISSN'
11
+ lccn:
12
+ label: 'LCCN'
13
+ oclcnum:
14
+ label: 'OCLC #'
15
+ publication_date_end:
16
+ label: 'Date de publication (fin)'
17
+ publication_date_start:
18
+ label: 'Date de publication (début)'
19
+ publication_date:
20
+ label: 'Date de publication'
21
+ alternative_title:
22
+ label: 'Titre supplémentaire'
23
+ edition_name:
24
+ label: "Nom de l'édition"
25
+ edition_number:
26
+ label: "Numéro de l'édition"
27
+ volume:
28
+ label: 'Volume'
29
+ issue_number:
30
+ label: 'Numéro de série'
31
+ page_number:
32
+ label: 'Pagination'
33
+ section:
34
+ label: 'Section'
35
+ extent:
36
+ label: 'Caractéristiques physiques'
37
+ frequency:
38
+ label: 'Fréquence'
39
+ preceded_by:
40
+ label: 'Titre précédent'
41
+ succeeded_by:
42
+ label: 'Titre suivant'
43
+ genre:
44
+ label: 'Genre'
45
+ geographic_coverage:
46
+ label: 'Lieux décrits'
47
+ author:
48
+ label: 'Auteur'
49
+ photographer:
50
+ label: 'Photographe'
51
+ height:
52
+ label: 'Taille'
53
+ width:
54
+ label: 'Largeur'
55
+ text_direction:
56
+ label: 'Direction du texte'
5
57
  place_of_publication:
6
58
  label: 'Place de publication'
7
59
  publication_title:
@@ -32,6 +84,25 @@ fr:
32
84
  front_pages_link: 'Voir toutes les premières pages'
33
85
 
34
86
  simple_form:
87
+ hints:
88
+ defaults:
89
+ held_by: "Nom de la bibliothèque ou de l'organisation qui contient l'élément d'origine."
90
+ place_of_publication: "L'emplacement où l'élément a été publié."
91
+ alternative_title: "Un titre ou un sous-titre supplémentaire pour l'élément."
92
+ edition_name: "Le nom de l'édition de l'élément."
93
+ edition_number: "Le numéro de l'édition de l'élément."
94
+ extent: "Une description des caractéristiques physiques de l'article."
95
+ frequency: 'Fréquence de publication.'
96
+ preceded_by: 'Le titre qui a précédé cette publication.'
97
+ succeeded_by: 'Le titre qui a suivi cette publication.'
98
+ publication_date: 'Date de publication (YYYY-MM-DD).'
99
+ publication_date_start: 'Date début de publication (YYYY-MM-DD).'
100
+ publication_date_end: 'Date fin de publication (YYYY-MM-DD).'
101
+ genre: "Type d'article de journal."
102
+ geographic_coverage: "L'emplacement décrit dans cet article."
103
+ page_number: "Les informations de pagination, telles qu'elles sont écrites sur la page."
104
+ text_direction: 'La direction dans laquelle le texte est lu.'
105
+ section: 'La section du journal dans laquelle l’article apparaît.'
35
106
  labels:
36
107
  defaults:
37
108
  held_by: 'Lieu de détention'
@@ -41,11 +112,37 @@ fr:
41
112
  oclcnum: 'OCLC #'
42
113
  publication_date_end: 'Date de publication (fin)'
43
114
  publication_date_start: 'Date de publication (début)'
115
+ publication_date: 'Date de publication'
116
+ place_of_publication: 'Emplacement de publication'
117
+ alternative_title: 'Titre supplémentaire'
118
+ edition_name: "Nom de l'édition"
119
+ edition_number: "Numéro de l'édition"
120
+ volume: 'Volume'
121
+ issue_number: 'Numéro de série'
122
+ page_number: 'Pagination'
123
+ section: 'Section'
124
+ extent: "Caractéristiques physiques"
125
+ frequency: 'Fréquence'
126
+ preceded_by: 'Titre précédent'
127
+ succeeded_by: 'Titre suivant'
128
+ genre: "Genre"
129
+ geographic_coverage: "Lieux décrits"
130
+ author: 'Auteur'
131
+ photographer: 'Photographe'
132
+ height: 'Taille'
133
+ width: 'Largeur'
134
+ text_direction: 'Direction du texte'
135
+
44
136
 
45
137
  blacklight:
46
138
  search:
47
139
  fields:
48
140
  all_text_tsimv: 'Correspondances de mots clés'
141
+ place_of_publication_label_tesim: 'Emplacement de publication'
142
+ publication_title_ssi: 'Journal'
143
+ publication_date_dtsi: 'Date de publication'
144
+ publication_date_start_dtsi: 'Date de publication (début)'
145
+ publication_date_end_dtsi: 'Date de publication (fin)'
49
146
  results:
50
147
  snippets:
51
148
  less: '<< moins'
@@ -2,6 +2,52 @@
2
2
  it:
3
3
  newspaper_works:
4
4
  attributes:
5
+ alternative_title:
6
+ label: 'Titolo aggiuntivo'
7
+ genre:
8
+ label: 'Genere'
9
+ held_by:
10
+ label: 'Tenendo posizione'
11
+ text_direction:
12
+ label: 'Direzione testo'
13
+ page_number:
14
+ label: 'Impaginazione'
15
+ section:
16
+ label: 'Sezione'
17
+ author:
18
+ label: 'Autore'
19
+ photographer:
20
+ label: 'Fotografo'
21
+ volume:
22
+ label: 'Volume'
23
+ issue_number:
24
+ label: 'Edizione numero'
25
+ edition_name:
26
+ label: 'Edizione nome'
27
+ edition_number:
28
+ label: 'Edizione numero'
29
+ geographic_coverage:
30
+ label: 'Luoghi descritti'
31
+ extent:
32
+ label: 'Caratteristiche fisiche'
33
+ publication_date:
34
+ label: 'Data di pubblicazione'
35
+ publication_date_start:
36
+ label: 'Data di pubblicazione (inizio)'
37
+ publication_date_end:
38
+ label: 'Data di pubblicazione (fine)'
39
+ frequency:
40
+ label: 'Frequenza'
41
+ preceded_by:
42
+ label: 'Titolo precedente'
43
+ succeeded_by:
44
+ label: 'Titolo successivo'
45
+ issn:
46
+ label: 'ISSN'
47
+ lccn:
48
+ label: 'LCCN'
49
+ oclcnum:
50
+ label: 'OCLC #'
5
51
  place_of_publication:
6
52
  label: 'Luogo di pubblicazione'
7
53
  publication_title:
@@ -32,6 +78,25 @@ it:
32
78
  front_pages_link: 'Visualizza tutte le prime pagine'
33
79
 
34
80
  simple_form:
81
+ hints:
82
+ defaults:
83
+ held_by: "Il nome della libreria o dell'organizzazione che contiene l'elemento originale."
84
+ place_of_publication: "La posizione in cui l'articolo è stato pubblicato."
85
+ alternative_title: "Un titolo o sottotitolo aggiuntivo per l'elemento."
86
+ edition_name: "Il nome dell'edizione dell'articolo."
87
+ edition_number: "Il numero dell'edizione dell'articolo."
88
+ extent: "Una descrizione delle caratteristiche fisiche dell'articolo."
89
+ frequency: 'Frequenza di pubblicazione.'
90
+ preceded_by: 'Il titolo che ha preceduto questa pubblicazione.'
91
+ succeeded_by: 'Il titolo che seguì questa pubblicazione.'
92
+ publication_date: 'Data di pubblicazione (YYYY-MM-DD).'
93
+ publication_date_start: 'Data inizio di pubblicazione (YYYY-MM-DD).'
94
+ publication_date_end: 'Data fine di pubblicazione (YYYY-MM-DD).'
95
+ genre: 'Tipo di articolo di giornale.'
96
+ geographic_coverage: 'La posizione descritta in questo articolo.'
97
+ page_number: 'Le informazioni di impaginazione, come scritto nella pagina.'
98
+ text_direction: 'La direzione in cui viene letto il testo.'
99
+ section: "La sezione del giornale in cui appare l'articolo."
35
100
  labels:
36
101
  defaults:
37
102
  held_by: 'Tenendo posizione'
@@ -41,11 +106,36 @@ it:
41
106
  oclcnum: 'OCLC #'
42
107
  publication_date_end: 'Data di pubblicazione (fine)'
43
108
  publication_date_start: 'Data di pubblicazione (inizio)'
109
+ publication_date: 'Date de publication'
110
+ place_of_publication: 'Posizione di pubblicazione'
111
+ alternative_title: 'Titolo aggiuntivo'
112
+ edition_name: "Edizione nome"
113
+ edition_number: "Edizione numero"
114
+ volume: 'Volume'
115
+ issue_number: 'Edizione numero'
116
+ page_number: 'Impaginazione'
117
+ section: 'Sezione'
118
+ extent: "Caratteristiche fisiche"
119
+ frequency: 'Frequenza'
120
+ preceded_by: 'Titolo precedente'
121
+ succeeded_by: 'Titolo successivo'
122
+ genre: "Genere"
123
+ geographic_coverage: "Luoghi descritti"
124
+ author: 'Autore'
125
+ photographer: 'Fotografo'
126
+ height: 'Altezza'
127
+ width: 'Larghezza'
128
+ text_direction: 'Direzione testo'
44
129
 
45
130
  blacklight:
46
131
  search:
47
132
  fields:
48
133
  all_text_tsimv: 'Corrispondenze di parole chiave'
134
+ place_of_publication_label_tesim: 'Posizione di pubblicazione'
135
+ publication_title_ssi: 'Giornale'
136
+ publication_date_dtsi: 'Data di pubblicazione'
137
+ publication_date_start_dtsi: 'Data di pubblicazione (inizio)'
138
+ publication_date_end_dtsi: 'Data di pubblicazione (fine)'
49
139
  results:
50
140
  snippets:
51
141
  less: '<< meno'
@@ -2,6 +2,58 @@
2
2
  pt-BR:
3
3
  newspaper_works:
4
4
  attributes:
5
+ held_by:
6
+ label: 'Segurando localização'
7
+ identifier:
8
+ label: 'Identificador (local)'
9
+ issn:
10
+ label: 'ISSN'
11
+ lccn:
12
+ label: 'LCCN'
13
+ oclcnum:
14
+ label: 'OCLC #'
15
+ publication_date_end:
16
+ label: 'Data de publicação (fim)'
17
+ publication_date_start:
18
+ label: 'Data de publicação (começo)'
19
+ publication_date:
20
+ label: 'Data de publicação'
21
+ alternative_title:
22
+ label: 'Título adicional'
23
+ edition_name:
24
+ label: 'Nome da edição'
25
+ edition_number:
26
+ label: 'Numero da edição'
27
+ volume:
28
+ label: 'Volume'
29
+ issue_number:
30
+ label: 'Numero da edição'
31
+ page_number:
32
+ label: 'Paginação'
33
+ section:
34
+ label: 'Seção'
35
+ extent:
36
+ label: 'Características físicas'
37
+ frequency:
38
+ label: 'Frequência'
39
+ preceded_by:
40
+ label: 'Título precedente'
41
+ succeeded_by:
42
+ label: 'Título de sucesso'
43
+ genre:
44
+ label: 'Gênero'
45
+ geographic_coverage:
46
+ label: 'Locais descritos'
47
+ author:
48
+ label: 'Autor'
49
+ photographer:
50
+ label: 'Fotógrafo'
51
+ height:
52
+ label: 'Altura'
53
+ width:
54
+ label: 'Largura'
55
+ text_direction:
56
+ label: 'Direção texto'
5
57
  place_of_publication:
6
58
  label: 'Local de publicação'
7
59
  publication_title:
@@ -32,6 +84,25 @@ pt-BR:
32
84
  front_pages_link: 'Ver todas as páginas principais'
33
85
 
34
86
  simple_form:
87
+ hints:
88
+ defaults:
89
+ held_by: 'O nome da biblioteca ou organização que contém o item original.'
90
+ place_of_publication: 'O local em que o item foi publicado.'
91
+ alternative_title: 'Um título ou legenda adicional para o item.'
92
+ edition_name: 'O nome da edição do item.'
93
+ edition_number: "O número da edição do item."
94
+ extent: 'Uma descrição das características físicas do item.'
95
+ frequency: 'Frequência de publicação.'
96
+ preceded_by: 'O título que precedeu esta publicação.'
97
+ succeeded_by: 'O título que se seguiu a esta publicação.'
98
+ publication_date: 'Data de publicação (YYYY-MM-DD).'
99
+ publication_date_start: 'Data começo de publicação (YYYY-MM-DD).'
100
+ publication_date_end: 'Data fim de publicação (YYYY-MM-DD).'
101
+ genre: 'Tipo de artigo de jornal.'
102
+ geographic_coverage: 'O local descrito neste artigo.'
103
+ page_number: 'As informações de paginação, conforme gravadas na página.'
104
+ text_direction: 'A direção na qual o texto é lido.'
105
+ section: 'A seção do jornal em que o item aparece.'
35
106
  labels:
36
107
  defaults:
37
108
  held_by: 'Segurando localização'
@@ -41,11 +112,36 @@ pt-BR:
41
112
  oclcnum: 'OCLC #'
42
113
  publication_date_end: 'Data de publicação (fim)'
43
114
  publication_date_start: 'Data de publicação (começo)'
115
+ publication_date: 'Data de publicação'
116
+ place_of_publication: 'Local de publicado'
117
+ alternative_title: 'Título adicional'
118
+ edition_name: "Nome da edição"
119
+ edition_number: "Numero da edição"
120
+ volume: 'Volume'
121
+ issue_number: 'Numero da edição'
122
+ page_number: 'Paginação'
123
+ section: 'Seção'
124
+ extent: "Características físicas"
125
+ frequency: 'Frequência'
126
+ preceded_by: 'Título precedente'
127
+ succeeded_by: 'Título de sucesso'
128
+ genre: "Gênero"
129
+ geographic_coverage: "Locais descritos"
130
+ author: 'Autor'
131
+ photographer: 'Fotógrafo'
132
+ height: 'Altura'
133
+ width: 'Largura'
134
+ text_direction: 'Direção texto'
44
135
 
45
136
  blacklight:
46
137
  search:
47
138
  fields:
48
139
  all_text_tsimv: 'Correspondências de palavras-chave'
140
+ place_of_publication_label_tesim: 'Local de publicado'
141
+ publication_title_ssi: 'Jornal'
142
+ publication_date_dtsi: 'Date de publication'
143
+ publication_date_start_dtsi: 'Data de publicação (começo)'
144
+ publication_date_end_dtsi: 'Data de publicação (fim)'
49
145
  results:
50
146
  snippets:
51
147
  less: '<< menos'
@@ -2,6 +2,52 @@
2
2
  zh:
3
3
  newspaper_works:
4
4
  attributes:
5
+ alternative_title:
6
+ label: '替代标题'
7
+ genre:
8
+ label: '类型'
9
+ held_by:
10
+ label: '持有位置'
11
+ text_direction:
12
+ label: '文字方向'
13
+ page_number:
14
+ label: '页码'
15
+ section:
16
+ label: '部分'
17
+ author:
18
+ label: '作者'
19
+ photographer:
20
+ label: '摄影家'
21
+ volume:
22
+ label: '体积'
23
+ issue_number:
24
+ label: '发行数量'
25
+ edition_name:
26
+ label: '项目版本的名称'
27
+ edition_number:
28
+ label: '项目的版本号'
29
+ geographic_coverage:
30
+ label: '所描述的地方'
31
+ extent:
32
+ label: '物理描述'
33
+ publication_date:
34
+ label: '出版日期'
35
+ publication_date_start:
36
+ label: '出版日期(开始)'
37
+ publication_date_end:
38
+ label: '出版日期(结束)'
39
+ frequency:
40
+ label: '发布频率'
41
+ preceded_by:
42
+ label: '前一个标题'
43
+ succeeded_by:
44
+ label: '继任职称'
45
+ issn:
46
+ label: 'ISSN'
47
+ lccn:
48
+ label: 'LCCN'
49
+ oclcnum:
50
+ label: 'OCLC #'
5
51
  place_of_publication:
6
52
  label: '出版地'
7
53
  publication_title:
@@ -32,6 +78,25 @@ zh:
32
78
  front_pages_link: '查看所有头版'
33
79
 
34
80
  simple_form:
81
+ hints:
82
+ defaults:
83
+ held_by: '拥有原始项目的图书馆或组织的名称。'
84
+ place_of_publication: '项目发布的位置。'
85
+ alternative_title: '项目的其他标题或副标题。'
86
+ edition_name: '项目版本的名称。'
87
+ edition_number: "项目的版本号。"
88
+ extent: '项目实际范围的描述'
89
+ frequency: '发布频率。'
90
+ preceded_by: '本出版物之前的标题。'
91
+ succeeded_by: '该出版物之后的标题。'
92
+ publication_date: '发布日期 (YYYY-MM-DD).'
93
+ publication_date_start: '出版日期(开始) (YYYY-MM-DD).'
94
+ publication_date_end: '出版日期(结束) (YYYY-MM-DD).'
95
+ genre: '报纸文章的类型。'
96
+ geographic_coverage: '本文中描述的位置。'
97
+ page_number: '分页信息写在页面上。'
98
+ text_direction: '文本阅读的方向。'
99
+ section: '出现项目的报纸部分。'
35
100
  labels:
36
101
  defaults:
37
102
  held_by: '持有位置'
@@ -41,11 +106,36 @@ zh:
41
106
  oclcnum: 'OCLC #'
42
107
  publication_date_end: '出版日期(结束)'
43
108
  publication_date_start: '出版日期(开始)'
109
+ publication_date: '出版日期'
110
+ place_of_publication: '项目发布的位置'
111
+ alternative_title: '替代标题'
112
+ edition_name: "项目版本的名称"
113
+ edition_number: "项目的版本号"
114
+ volume: '体积'
115
+ issue_number: '发行数量'
116
+ page_number: '页码'
117
+ section: '部分'
118
+ extent: "物理描述"
119
+ frequency: '发布频率'
120
+ preceded_by: '前一个标题'
121
+ succeeded_by: '继任职称'
122
+ genre: "类型"
123
+ geographic_coverage: "所描述的地方"
124
+ author: '作者'
125
+ photographer: '摄影家'
126
+ height: '高度'
127
+ width: '宽度'
128
+ text_direction: '文字方向'
44
129
 
45
130
  blacklight:
46
131
  search:
47
132
  fields:
48
133
  all_text_tsimv: '关键字匹配'
134
+ place_of_publication_label_tesim: '项目发布的位置'
135
+ publication_title_ssi: '报纸'
136
+ publication_date_dtsi: '出版日期'
137
+ publication_date_start_dtsi: '出版日期(开始)'
138
+ publication_date_end_dtsi: '出版日期(结束)'
49
139
  results:
50
140
  snippets:
51
141
  less: '<< 减'