newspaper_works 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +5 -5
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +4 -0
  4. data/.travis.yml +2 -2
  5. data/README.md +14 -13
  6. data/app/services/newspaper_works/jp2_derivative_service.rb +1 -3
  7. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +37 -15
  8. data/app/services/newspaper_works/pdf_derivative_service.rb +4 -7
  9. data/app/services/newspaper_works/tiff_derivative_service.rb +5 -9
  10. data/app/views/newspaper_works/base/_attribute_rows.html.erb +72 -24
  11. data/config/locales/newspaper_article.de.yml +1 -1
  12. data/config/locales/newspaper_article.en.yml +1 -1
  13. data/config/locales/newspaper_article.es.yml +1 -1
  14. data/config/locales/newspaper_article.fr.yml +1 -1
  15. data/config/locales/newspaper_article.it.yml +1 -1
  16. data/config/locales/newspaper_article.pt-BR.yml +1 -1
  17. data/config/locales/newspaper_article.zh.yml +1 -1
  18. data/config/locales/newspaper_container.de.yml +1 -1
  19. data/config/locales/newspaper_container.en.yml +1 -1
  20. data/config/locales/newspaper_container.es.yml +1 -1
  21. data/config/locales/newspaper_container.fr.yml +1 -1
  22. data/config/locales/newspaper_container.it.yml +1 -1
  23. data/config/locales/newspaper_container.pt-BR.yml +1 -1
  24. data/config/locales/newspaper_container.zh.yml +1 -1
  25. data/config/locales/newspaper_issue.de.yml +1 -1
  26. data/config/locales/newspaper_issue.en.yml +1 -1
  27. data/config/locales/newspaper_issue.es.yml +1 -1
  28. data/config/locales/newspaper_issue.fr.yml +1 -1
  29. data/config/locales/newspaper_issue.it.yml +2 -2
  30. data/config/locales/newspaper_issue.pt-BR.yml +2 -2
  31. data/config/locales/newspaper_issue.zh.yml +2 -2
  32. data/config/locales/newspaper_page.de.yml +1 -1
  33. data/config/locales/newspaper_page.en.yml +1 -1
  34. data/config/locales/newspaper_page.es.yml +1 -1
  35. data/config/locales/newspaper_page.fr.yml +1 -1
  36. data/config/locales/newspaper_page.it.yml +1 -1
  37. data/config/locales/newspaper_page.pt-BR.yml +1 -1
  38. data/config/locales/newspaper_page.zh.yml +1 -1
  39. data/config/locales/newspaper_title.de.yml +1 -1
  40. data/config/locales/newspaper_title.en.yml +1 -1
  41. data/config/locales/newspaper_title.es.yml +1 -1
  42. data/config/locales/newspaper_title.fr.yml +1 -1
  43. data/config/locales/newspaper_title.it.yml +1 -1
  44. data/config/locales/newspaper_title.pt-BR.yml +1 -1
  45. data/config/locales/newspaper_title.zh.yml +1 -1
  46. data/config/locales/newspaper_works.de.yml +98 -0
  47. data/config/locales/newspaper_works.en.yml +67 -0
  48. data/config/locales/newspaper_works.es.yml +96 -0
  49. data/config/locales/newspaper_works.fr.yml +97 -0
  50. data/config/locales/newspaper_works.it.yml +90 -0
  51. data/config/locales/newspaper_works.pt-BR.yml +96 -0
  52. data/config/locales/newspaper_works.zh.yml +90 -0
  53. data/config/vendor/fits.xml +55 -0
  54. data/config/vendor/imagemagick-6-policy.xml +39 -39
  55. data/lib/newspaper_works.rb +2 -0
  56. data/lib/newspaper_works/image_tool.rb +119 -0
  57. data/lib/newspaper_works/jp2_image_metadata.rb +81 -0
  58. data/lib/newspaper_works/text_extraction.rb +1 -0
  59. data/lib/newspaper_works/text_extraction/hocr_reader.rb +173 -0
  60. data/lib/newspaper_works/text_extraction/page_ocr.rb +37 -51
  61. data/lib/newspaper_works/text_extraction/render_alto.rb +4 -4
  62. data/lib/newspaper_works/version.rb +1 -1
  63. data/newspaper_works.gemspec +2 -3
  64. data/spec/features/search_results_thumbnail_highlights_spec.rb +1 -1
  65. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  66. data/spec/lib/newspaper_works/image_tool_spec.rb +109 -0
  67. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +3 -3
  68. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +2 -2
  69. data/spec/lib/newspaper_works/jp2_image_metadata_spec.rb +37 -0
  70. data/spec/lib/newspaper_works/text_extraction/hocr_reader_spec.rb +45 -0
  71. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +3 -3
  72. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +14 -14
  73. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +10 -13
  74. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +10 -8
  75. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +11 -7
  76. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +17 -10
  77. data/spec/spec_helper.rb +19 -0
  78. metadata +21 -22
@@ -2,6 +2,58 @@
2
2
  fr:
3
3
  newspaper_works:
4
4
  attributes:
5
+ held_by:
6
+ label: 'Lieu de détention'
7
+ identifier:
8
+ label: 'Identifiant (local)'
9
+ issn:
10
+ label: 'ISSN'
11
+ lccn:
12
+ label: 'LCCN'
13
+ oclcnum:
14
+ label: 'OCLC #'
15
+ publication_date_end:
16
+ label: 'Date de publication (fin)'
17
+ publication_date_start:
18
+ label: 'Date de publication (début)'
19
+ publication_date:
20
+ label: 'Date de publication'
21
+ alternative_title:
22
+ label: 'Titre supplémentaire'
23
+ edition_name:
24
+ label: "Nom de l'édition"
25
+ edition_number:
26
+ label: "Numéro de l'édition"
27
+ volume:
28
+ label: 'Volume'
29
+ issue_number:
30
+ label: 'Numéro de série'
31
+ page_number:
32
+ label: 'Pagination'
33
+ section:
34
+ label: 'Section'
35
+ extent:
36
+ label: 'Caractéristiques physiques'
37
+ frequency:
38
+ label: 'Fréquence'
39
+ preceded_by:
40
+ label: 'Titre précédent'
41
+ succeeded_by:
42
+ label: 'Titre suivant'
43
+ genre:
44
+ label: 'Genre'
45
+ geographic_coverage:
46
+ label: 'Lieux décrits'
47
+ author:
48
+ label: 'Auteur'
49
+ photographer:
50
+ label: 'Photographe'
51
+ height:
52
+ label: 'Taille'
53
+ width:
54
+ label: 'Largeur'
55
+ text_direction:
56
+ label: 'Direction du texte'
5
57
  place_of_publication:
6
58
  label: 'Place de publication'
7
59
  publication_title:
@@ -32,6 +84,25 @@ fr:
32
84
  front_pages_link: 'Voir toutes les premières pages'
33
85
 
34
86
  simple_form:
87
+ hints:
88
+ defaults:
89
+ held_by: "Nom de la bibliothèque ou de l'organisation qui contient l'élément d'origine."
90
+ place_of_publication: "L'emplacement où l'élément a été publié."
91
+ alternative_title: "Un titre ou un sous-titre supplémentaire pour l'élément."
92
+ edition_name: "Le nom de l'édition de l'élément."
93
+ edition_number: "Le numéro de l'édition de l'élément."
94
+ extent: "Une description des caractéristiques physiques de l'article."
95
+ frequency: 'Fréquence de publication.'
96
+ preceded_by: 'Le titre qui a précédé cette publication.'
97
+ succeeded_by: 'Le titre qui a suivi cette publication.'
98
+ publication_date: 'Date de publication (YYYY-MM-DD).'
99
+ publication_date_start: 'Date début de publication (YYYY-MM-DD).'
100
+ publication_date_end: 'Date fin de publication (YYYY-MM-DD).'
101
+ genre: "Type d'article de journal."
102
+ geographic_coverage: "L'emplacement décrit dans cet article."
103
+ page_number: "Les informations de pagination, telles qu'elles sont écrites sur la page."
104
+ text_direction: 'La direction dans laquelle le texte est lu.'
105
+ section: 'La section du journal dans laquelle l’article apparaît.'
35
106
  labels:
36
107
  defaults:
37
108
  held_by: 'Lieu de détention'
@@ -41,11 +112,37 @@ fr:
41
112
  oclcnum: 'OCLC #'
42
113
  publication_date_end: 'Date de publication (fin)'
43
114
  publication_date_start: 'Date de publication (début)'
115
+ publication_date: 'Date de publication'
116
+ place_of_publication: 'Emplacement de publication'
117
+ alternative_title: 'Titre supplémentaire'
118
+ edition_name: "Nom de l'édition"
119
+ edition_number: "Numéro de l'édition"
120
+ volume: 'Volume'
121
+ issue_number: 'Numéro de série'
122
+ page_number: 'Pagination'
123
+ section: 'Section'
124
+ extent: "Caractéristiques physiques"
125
+ frequency: 'Fréquence'
126
+ preceded_by: 'Titre précédent'
127
+ succeeded_by: 'Titre suivant'
128
+ genre: "Genre"
129
+ geographic_coverage: "Lieux décrits"
130
+ author: 'Auteur'
131
+ photographer: 'Photographe'
132
+ height: 'Taille'
133
+ width: 'Largeur'
134
+ text_direction: 'Direction du texte'
135
+
44
136
 
45
137
  blacklight:
46
138
  search:
47
139
  fields:
48
140
  all_text_tsimv: 'Correspondances de mots clés'
141
+ place_of_publication_label_tesim: 'Emplacement de publication'
142
+ publication_title_ssi: 'Journal'
143
+ publication_date_dtsi: 'Date de publication'
144
+ publication_date_start_dtsi: 'Date de publication (début)'
145
+ publication_date_end_dtsi: 'Date de publication (fin)'
49
146
  results:
50
147
  snippets:
51
148
  less: '<< moins'
@@ -2,6 +2,52 @@
2
2
  it:
3
3
  newspaper_works:
4
4
  attributes:
5
+ alternative_title:
6
+ label: 'Titolo aggiuntivo'
7
+ genre:
8
+ label: 'Genere'
9
+ held_by:
10
+ label: 'Tenendo posizione'
11
+ text_direction:
12
+ label: 'Direzione testo'
13
+ page_number:
14
+ label: 'Impaginazione'
15
+ section:
16
+ label: 'Sezione'
17
+ author:
18
+ label: 'Autore'
19
+ photographer:
20
+ label: 'Fotografo'
21
+ volume:
22
+ label: 'Volume'
23
+ issue_number:
24
+ label: 'Edizione numero'
25
+ edition_name:
26
+ label: 'Edizione nome'
27
+ edition_number:
28
+ label: 'Edizione numero'
29
+ geographic_coverage:
30
+ label: 'Luoghi descritti'
31
+ extent:
32
+ label: 'Caratteristiche fisiche'
33
+ publication_date:
34
+ label: 'Data di pubblicazione'
35
+ publication_date_start:
36
+ label: 'Data di pubblicazione (inizio)'
37
+ publication_date_end:
38
+ label: 'Data di pubblicazione (fine)'
39
+ frequency:
40
+ label: 'Frequenza'
41
+ preceded_by:
42
+ label: 'Titolo precedente'
43
+ succeeded_by:
44
+ label: 'Titolo successivo'
45
+ issn:
46
+ label: 'ISSN'
47
+ lccn:
48
+ label: 'LCCN'
49
+ oclcnum:
50
+ label: 'OCLC #'
5
51
  place_of_publication:
6
52
  label: 'Luogo di pubblicazione'
7
53
  publication_title:
@@ -32,6 +78,25 @@ it:
32
78
  front_pages_link: 'Visualizza tutte le prime pagine'
33
79
 
34
80
  simple_form:
81
+ hints:
82
+ defaults:
83
+ held_by: "Il nome della libreria o dell'organizzazione che contiene l'elemento originale."
84
+ place_of_publication: "La posizione in cui l'articolo è stato pubblicato."
85
+ alternative_title: "Un titolo o sottotitolo aggiuntivo per l'elemento."
86
+ edition_name: "Il nome dell'edizione dell'articolo."
87
+ edition_number: "Il numero dell'edizione dell'articolo."
88
+ extent: "Una descrizione delle caratteristiche fisiche dell'articolo."
89
+ frequency: 'Frequenza di pubblicazione.'
90
+ preceded_by: 'Il titolo che ha preceduto questa pubblicazione.'
91
+ succeeded_by: 'Il titolo che seguì questa pubblicazione.'
92
+ publication_date: 'Data di pubblicazione (YYYY-MM-DD).'
93
+ publication_date_start: 'Data inizio di pubblicazione (YYYY-MM-DD).'
94
+ publication_date_end: 'Data fine di pubblicazione (YYYY-MM-DD).'
95
+ genre: 'Tipo di articolo di giornale.'
96
+ geographic_coverage: 'La posizione descritta in questo articolo.'
97
+ page_number: 'Le informazioni di impaginazione, come scritto nella pagina.'
98
+ text_direction: 'La direzione in cui viene letto il testo.'
99
+ section: "La sezione del giornale in cui appare l'articolo."
35
100
  labels:
36
101
  defaults:
37
102
  held_by: 'Tenendo posizione'
@@ -41,11 +106,36 @@ it:
41
106
  oclcnum: 'OCLC #'
42
107
  publication_date_end: 'Data di pubblicazione (fine)'
43
108
  publication_date_start: 'Data di pubblicazione (inizio)'
109
+ publication_date: 'Date de publication'
110
+ place_of_publication: 'Posizione di pubblicazione'
111
+ alternative_title: 'Titolo aggiuntivo'
112
+ edition_name: "Edizione nome"
113
+ edition_number: "Edizione numero"
114
+ volume: 'Volume'
115
+ issue_number: 'Edizione numero'
116
+ page_number: 'Impaginazione'
117
+ section: 'Sezione'
118
+ extent: "Caratteristiche fisiche"
119
+ frequency: 'Frequenza'
120
+ preceded_by: 'Titolo precedente'
121
+ succeeded_by: 'Titolo successivo'
122
+ genre: "Genere"
123
+ geographic_coverage: "Luoghi descritti"
124
+ author: 'Autore'
125
+ photographer: 'Fotografo'
126
+ height: 'Altezza'
127
+ width: 'Larghezza'
128
+ text_direction: 'Direzione testo'
44
129
 
45
130
  blacklight:
46
131
  search:
47
132
  fields:
48
133
  all_text_tsimv: 'Corrispondenze di parole chiave'
134
+ place_of_publication_label_tesim: 'Posizione di pubblicazione'
135
+ publication_title_ssi: 'Giornale'
136
+ publication_date_dtsi: 'Data di pubblicazione'
137
+ publication_date_start_dtsi: 'Data di pubblicazione (inizio)'
138
+ publication_date_end_dtsi: 'Data di pubblicazione (fine)'
49
139
  results:
50
140
  snippets:
51
141
  less: '<< meno'
@@ -2,6 +2,58 @@
2
2
  pt-BR:
3
3
  newspaper_works:
4
4
  attributes:
5
+ held_by:
6
+ label: 'Segurando localização'
7
+ identifier:
8
+ label: 'Identificador (local)'
9
+ issn:
10
+ label: 'ISSN'
11
+ lccn:
12
+ label: 'LCCN'
13
+ oclcnum:
14
+ label: 'OCLC #'
15
+ publication_date_end:
16
+ label: 'Data de publicação (fim)'
17
+ publication_date_start:
18
+ label: 'Data de publicação (começo)'
19
+ publication_date:
20
+ label: 'Data de publicação'
21
+ alternative_title:
22
+ label: 'Título adicional'
23
+ edition_name:
24
+ label: 'Nome da edição'
25
+ edition_number:
26
+ label: 'Numero da edição'
27
+ volume:
28
+ label: 'Volume'
29
+ issue_number:
30
+ label: 'Numero da edição'
31
+ page_number:
32
+ label: 'Paginação'
33
+ section:
34
+ label: 'Seção'
35
+ extent:
36
+ label: 'Características físicas'
37
+ frequency:
38
+ label: 'Frequência'
39
+ preceded_by:
40
+ label: 'Título precedente'
41
+ succeeded_by:
42
+ label: 'Título de sucesso'
43
+ genre:
44
+ label: 'Gênero'
45
+ geographic_coverage:
46
+ label: 'Locais descritos'
47
+ author:
48
+ label: 'Autor'
49
+ photographer:
50
+ label: 'Fotógrafo'
51
+ height:
52
+ label: 'Altura'
53
+ width:
54
+ label: 'Largura'
55
+ text_direction:
56
+ label: 'Direção texto'
5
57
  place_of_publication:
6
58
  label: 'Local de publicação'
7
59
  publication_title:
@@ -32,6 +84,25 @@ pt-BR:
32
84
  front_pages_link: 'Ver todas as páginas principais'
33
85
 
34
86
  simple_form:
87
+ hints:
88
+ defaults:
89
+ held_by: 'O nome da biblioteca ou organização que contém o item original.'
90
+ place_of_publication: 'O local em que o item foi publicado.'
91
+ alternative_title: 'Um título ou legenda adicional para o item.'
92
+ edition_name: 'O nome da edição do item.'
93
+ edition_number: "O número da edição do item."
94
+ extent: 'Uma descrição das características físicas do item.'
95
+ frequency: 'Frequência de publicação.'
96
+ preceded_by: 'O título que precedeu esta publicação.'
97
+ succeeded_by: 'O título que se seguiu a esta publicação.'
98
+ publication_date: 'Data de publicação (YYYY-MM-DD).'
99
+ publication_date_start: 'Data começo de publicação (YYYY-MM-DD).'
100
+ publication_date_end: 'Data fim de publicação (YYYY-MM-DD).'
101
+ genre: 'Tipo de artigo de jornal.'
102
+ geographic_coverage: 'O local descrito neste artigo.'
103
+ page_number: 'As informações de paginação, conforme gravadas na página.'
104
+ text_direction: 'A direção na qual o texto é lido.'
105
+ section: 'A seção do jornal em que o item aparece.'
35
106
  labels:
36
107
  defaults:
37
108
  held_by: 'Segurando localização'
@@ -41,11 +112,36 @@ pt-BR:
41
112
  oclcnum: 'OCLC #'
42
113
  publication_date_end: 'Data de publicação (fim)'
43
114
  publication_date_start: 'Data de publicação (começo)'
115
+ publication_date: 'Data de publicação'
116
+ place_of_publication: 'Local de publicado'
117
+ alternative_title: 'Título adicional'
118
+ edition_name: "Nome da edição"
119
+ edition_number: "Numero da edição"
120
+ volume: 'Volume'
121
+ issue_number: 'Numero da edição'
122
+ page_number: 'Paginação'
123
+ section: 'Seção'
124
+ extent: "Características físicas"
125
+ frequency: 'Frequência'
126
+ preceded_by: 'Título precedente'
127
+ succeeded_by: 'Título de sucesso'
128
+ genre: "Gênero"
129
+ geographic_coverage: "Locais descritos"
130
+ author: 'Autor'
131
+ photographer: 'Fotógrafo'
132
+ height: 'Altura'
133
+ width: 'Largura'
134
+ text_direction: 'Direção texto'
44
135
 
45
136
  blacklight:
46
137
  search:
47
138
  fields:
48
139
  all_text_tsimv: 'Correspondências de palavras-chave'
140
+ place_of_publication_label_tesim: 'Local de publicado'
141
+ publication_title_ssi: 'Jornal'
142
+ publication_date_dtsi: 'Date de publication'
143
+ publication_date_start_dtsi: 'Data de publicação (começo)'
144
+ publication_date_end_dtsi: 'Data de publicação (fim)'
49
145
  results:
50
146
  snippets:
51
147
  less: '<< menos'
@@ -2,6 +2,52 @@
2
2
  zh:
3
3
  newspaper_works:
4
4
  attributes:
5
+ alternative_title:
6
+ label: '替代标题'
7
+ genre:
8
+ label: '类型'
9
+ held_by:
10
+ label: '持有位置'
11
+ text_direction:
12
+ label: '文字方向'
13
+ page_number:
14
+ label: '页码'
15
+ section:
16
+ label: '部分'
17
+ author:
18
+ label: '作者'
19
+ photographer:
20
+ label: '摄影家'
21
+ volume:
22
+ label: '体积'
23
+ issue_number:
24
+ label: '发行数量'
25
+ edition_name:
26
+ label: '项目版本的名称'
27
+ edition_number:
28
+ label: '项目的版本号'
29
+ geographic_coverage:
30
+ label: '所描述的地方'
31
+ extent:
32
+ label: '物理描述'
33
+ publication_date:
34
+ label: '出版日期'
35
+ publication_date_start:
36
+ label: '出版日期(开始)'
37
+ publication_date_end:
38
+ label: '出版日期(结束)'
39
+ frequency:
40
+ label: '发布频率'
41
+ preceded_by:
42
+ label: '前一个标题'
43
+ succeeded_by:
44
+ label: '继任职称'
45
+ issn:
46
+ label: 'ISSN'
47
+ lccn:
48
+ label: 'LCCN'
49
+ oclcnum:
50
+ label: 'OCLC #'
5
51
  place_of_publication:
6
52
  label: '出版地'
7
53
  publication_title:
@@ -32,6 +78,25 @@ zh:
32
78
  front_pages_link: '查看所有头版'
33
79
 
34
80
  simple_form:
81
+ hints:
82
+ defaults:
83
+ held_by: '拥有原始项目的图书馆或组织的名称。'
84
+ place_of_publication: '项目发布的位置。'
85
+ alternative_title: '项目的其他标题或副标题。'
86
+ edition_name: '项目版本的名称。'
87
+ edition_number: "项目的版本号。"
88
+ extent: '项目实际范围的描述'
89
+ frequency: '发布频率。'
90
+ preceded_by: '本出版物之前的标题。'
91
+ succeeded_by: '该出版物之后的标题。'
92
+ publication_date: '发布日期 (YYYY-MM-DD).'
93
+ publication_date_start: '出版日期(开始) (YYYY-MM-DD).'
94
+ publication_date_end: '出版日期(结束) (YYYY-MM-DD).'
95
+ genre: '报纸文章的类型。'
96
+ geographic_coverage: '本文中描述的位置。'
97
+ page_number: '分页信息写在页面上。'
98
+ text_direction: '文本阅读的方向。'
99
+ section: '出现项目的报纸部分。'
35
100
  labels:
36
101
  defaults:
37
102
  held_by: '持有位置'
@@ -41,11 +106,36 @@ zh:
41
106
  oclcnum: 'OCLC #'
42
107
  publication_date_end: '出版日期(结束)'
43
108
  publication_date_start: '出版日期(开始)'
109
+ publication_date: '出版日期'
110
+ place_of_publication: '项目发布的位置'
111
+ alternative_title: '替代标题'
112
+ edition_name: "项目版本的名称"
113
+ edition_number: "项目的版本号"
114
+ volume: '体积'
115
+ issue_number: '发行数量'
116
+ page_number: '页码'
117
+ section: '部分'
118
+ extent: "物理描述"
119
+ frequency: '发布频率'
120
+ preceded_by: '前一个标题'
121
+ succeeded_by: '继任职称'
122
+ genre: "类型"
123
+ geographic_coverage: "所描述的地方"
124
+ author: '作者'
125
+ photographer: '摄影家'
126
+ height: '高度'
127
+ width: '宽度'
128
+ text_direction: '文字方向'
44
129
 
45
130
  blacklight:
46
131
  search:
47
132
  fields:
48
133
  all_text_tsimv: '关键字匹配'
134
+ place_of_publication_label_tesim: '项目发布的位置'
135
+ publication_title_ssi: '报纸'
136
+ publication_date_dtsi: '出版日期'
137
+ publication_date_start_dtsi: '出版日期(开始)'
138
+ publication_date_end_dtsi: '出版日期(结束)'
49
139
  results:
50
140
  snippets:
51
141
  less: '<< 减'