newspaper_works 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.coveralls.yml +2 -0
- data/.gitignore +4 -0
- data/.travis.yml +2 -2
- data/README.md +14 -13
- data/app/services/newspaper_works/jp2_derivative_service.rb +1 -3
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +37 -15
- data/app/services/newspaper_works/pdf_derivative_service.rb +4 -7
- data/app/services/newspaper_works/tiff_derivative_service.rb +5 -9
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +72 -24
- data/config/locales/newspaper_article.de.yml +1 -1
- data/config/locales/newspaper_article.en.yml +1 -1
- data/config/locales/newspaper_article.es.yml +1 -1
- data/config/locales/newspaper_article.fr.yml +1 -1
- data/config/locales/newspaper_article.it.yml +1 -1
- data/config/locales/newspaper_article.pt-BR.yml +1 -1
- data/config/locales/newspaper_article.zh.yml +1 -1
- data/config/locales/newspaper_container.de.yml +1 -1
- data/config/locales/newspaper_container.en.yml +1 -1
- data/config/locales/newspaper_container.es.yml +1 -1
- data/config/locales/newspaper_container.fr.yml +1 -1
- data/config/locales/newspaper_container.it.yml +1 -1
- data/config/locales/newspaper_container.pt-BR.yml +1 -1
- data/config/locales/newspaper_container.zh.yml +1 -1
- data/config/locales/newspaper_issue.de.yml +1 -1
- data/config/locales/newspaper_issue.en.yml +1 -1
- data/config/locales/newspaper_issue.es.yml +1 -1
- data/config/locales/newspaper_issue.fr.yml +1 -1
- data/config/locales/newspaper_issue.it.yml +2 -2
- data/config/locales/newspaper_issue.pt-BR.yml +2 -2
- data/config/locales/newspaper_issue.zh.yml +2 -2
- data/config/locales/newspaper_page.de.yml +1 -1
- data/config/locales/newspaper_page.en.yml +1 -1
- data/config/locales/newspaper_page.es.yml +1 -1
- data/config/locales/newspaper_page.fr.yml +1 -1
- data/config/locales/newspaper_page.it.yml +1 -1
- data/config/locales/newspaper_page.pt-BR.yml +1 -1
- data/config/locales/newspaper_page.zh.yml +1 -1
- data/config/locales/newspaper_title.de.yml +1 -1
- data/config/locales/newspaper_title.en.yml +1 -1
- data/config/locales/newspaper_title.es.yml +1 -1
- data/config/locales/newspaper_title.fr.yml +1 -1
- data/config/locales/newspaper_title.it.yml +1 -1
- data/config/locales/newspaper_title.pt-BR.yml +1 -1
- data/config/locales/newspaper_title.zh.yml +1 -1
- data/config/locales/newspaper_works.de.yml +98 -0
- data/config/locales/newspaper_works.en.yml +67 -0
- data/config/locales/newspaper_works.es.yml +96 -0
- data/config/locales/newspaper_works.fr.yml +97 -0
- data/config/locales/newspaper_works.it.yml +90 -0
- data/config/locales/newspaper_works.pt-BR.yml +96 -0
- data/config/locales/newspaper_works.zh.yml +90 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +39 -39
- data/lib/newspaper_works.rb +2 -0
- data/lib/newspaper_works/image_tool.rb +119 -0
- data/lib/newspaper_works/jp2_image_metadata.rb +81 -0
- data/lib/newspaper_works/text_extraction.rb +1 -0
- data/lib/newspaper_works/text_extraction/hocr_reader.rb +173 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +37 -51
- data/lib/newspaper_works/text_extraction/render_alto.rb +4 -4
- data/lib/newspaper_works/version.rb +1 -1
- data/newspaper_works.gemspec +2 -3
- data/spec/features/search_results_thumbnail_highlights_spec.rb +1 -1
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/lib/newspaper_works/image_tool_spec.rb +109 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +3 -3
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +2 -2
- data/spec/lib/newspaper_works/jp2_image_metadata_spec.rb +37 -0
- data/spec/lib/newspaper_works/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +3 -3
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +14 -14
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +10 -13
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +10 -8
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +11 -7
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +17 -10
- data/spec/spec_helper.rb +19 -0
- metadata +21 -22
@@ -2,8 +2,62 @@
|
|
2
2
|
de:
|
3
3
|
newspaper_works:
|
4
4
|
attributes:
|
5
|
+
held_by:
|
6
|
+
label: 'Halteort'
|
7
|
+
identifier:
|
8
|
+
label: 'Kennung (lokal)'
|
9
|
+
issn:
|
10
|
+
label: 'ISSN'
|
11
|
+
lccn:
|
12
|
+
label: 'LCCN'
|
13
|
+
oclcnum:
|
14
|
+
label: 'OCLC #'
|
15
|
+
publication_date_end:
|
16
|
+
label: 'Erscheinungsdatum (Ende)'
|
17
|
+
publication_date_start:
|
18
|
+
label: 'Erscheinungsdatum (Beginn)'
|
19
|
+
publication_date:
|
20
|
+
label: 'Erscheinungsdatum'
|
21
|
+
alternative_title:
|
22
|
+
label: 'Zusätzlicher Titel'
|
23
|
+
edition_name:
|
24
|
+
label: 'Name der Ausgabe'
|
25
|
+
edition_number:
|
26
|
+
label: 'Nummer der Ausgabe'
|
27
|
+
volume:
|
28
|
+
label: 'Volumen'
|
29
|
+
issue_number:
|
30
|
+
label: 'Ausgabe Nummer'
|
31
|
+
page_number:
|
32
|
+
label: 'Seitennummer'
|
33
|
+
section:
|
34
|
+
label: 'Abschnitt'
|
35
|
+
extent:
|
36
|
+
label: 'Physikalischen Eigenschaften'
|
37
|
+
frequency:
|
38
|
+
label: 'Häufigkeit'
|
39
|
+
preceded_by:
|
40
|
+
label: 'Vorhergehender Titel'
|
41
|
+
succeeded_by:
|
42
|
+
label: 'Nachfolgender Titel'
|
43
|
+
genre:
|
44
|
+
label: 'Genre'
|
45
|
+
geographic_coverage:
|
46
|
+
label: 'Orte beschrieben'
|
47
|
+
author:
|
48
|
+
label: 'Autor'
|
49
|
+
photographer:
|
50
|
+
label: 'Fotograf'
|
51
|
+
height:
|
52
|
+
label: 'Höhe'
|
53
|
+
width:
|
54
|
+
label: 'Breite'
|
55
|
+
text_direction:
|
56
|
+
label: 'Richtung der Text'
|
5
57
|
place_of_publication:
|
6
58
|
label: 'Ort der veröffentlichung'
|
59
|
+
publication_title:
|
60
|
+
label: 'Zeitung'
|
7
61
|
newspapers_search:
|
8
62
|
title: 'Suche nach Zeitungen'
|
9
63
|
text: 'Verwenden Sie dieses Formular für die Suche nach Zeitungsinhalten im Volltext.'
|
@@ -30,6 +84,25 @@ de:
|
|
30
84
|
front_pages_link: 'Alle Titelseiten anzeigen'
|
31
85
|
|
32
86
|
simple_form:
|
87
|
+
hints:
|
88
|
+
defaults:
|
89
|
+
held_by: 'Der Name der Bibliothek oder Organisation, in der sich das Originalelement befindet.'
|
90
|
+
place_of_publication: 'Der Ort, an dem der Artikel veröffentlicht wurde.'
|
91
|
+
alternative_title: 'Ein zusätzlicher Titel oder Untertitel für den Artikel.'
|
92
|
+
edition_name: 'Der Name der Ausgabe des Artikels.'
|
93
|
+
edition_number: "Die Nummer der Ausgabe des Artikels."
|
94
|
+
extent: 'Eine Beschreibung der physikalischen Eigenschaften des Artikels.'
|
95
|
+
frequency: 'Häufigkeit der Veröffentlichung.'
|
96
|
+
preceded_by: 'Der Titel vor dieser Veröffentlichung.'
|
97
|
+
succeeded_by: 'Der Titel, der auf diese Veröffentlichung folgte.'
|
98
|
+
publication_date: 'Veröffentlichungsdatum (YYYY-MM-DD).'
|
99
|
+
publication_date_start: 'Erscheinungsdatum Beginn (YYYY-MM-DD).'
|
100
|
+
publication_date_end: 'Erscheinungsdatum Ende (YYYY-MM-DD).'
|
101
|
+
genre: 'Art des Zeitungsartikels.'
|
102
|
+
geographic_coverage: 'Der in diesem Artikel beschriebene Speicherort.'
|
103
|
+
page_number: 'Die Paginierungsinformationen, wie auf der Seite geschrieben.'
|
104
|
+
text_direction: 'Die Richtung, in die der Text gelesen wird.'
|
105
|
+
section: 'Der Abschnitt der Zeitung, in dem der Artikel erscheint.'
|
33
106
|
labels:
|
34
107
|
defaults:
|
35
108
|
held_by: 'Halteort'
|
@@ -39,11 +112,36 @@ de:
|
|
39
112
|
oclcnum: 'OCLC #'
|
40
113
|
publication_date_end: 'Erscheinungsdatum (Ende)'
|
41
114
|
publication_date_start: 'Erscheinungsdatum (Beginn)'
|
115
|
+
publication_date: 'Erscheinungsdatum'
|
116
|
+
place_of_publication: 'Ort der Veröffentlichung'
|
117
|
+
alternative_title: 'Zusätzlicher Titel'
|
118
|
+
edition_name: "Name der Ausgabe"
|
119
|
+
edition_number: "Nummer der Ausgabe"
|
120
|
+
volume: 'Volumen'
|
121
|
+
issue_number: 'Ausgabe Nummer'
|
122
|
+
page_number: 'Seitennummer'
|
123
|
+
section: 'Abschnitt'
|
124
|
+
extent: "Physikalischen Eigenschaften"
|
125
|
+
frequency: 'Häufigkeit'
|
126
|
+
preceded_by: 'Vorhergehender Titel'
|
127
|
+
succeeded_by: 'Nachfolgender Titel'
|
128
|
+
genre: "Genre"
|
129
|
+
geographic_coverage: "Orte beschrieben"
|
130
|
+
author: 'Autor'
|
131
|
+
photographer: 'Fotograf'
|
132
|
+
height: 'Höhe'
|
133
|
+
width: 'Breite'
|
134
|
+
text_direction: 'Richtung der Text'
|
42
135
|
|
43
136
|
blacklight:
|
44
137
|
search:
|
45
138
|
fields:
|
46
139
|
all_text_tsimv: 'Keyword-Übereinstimmungen'
|
140
|
+
place_of_publication_label_tesim: 'Ort der Veröffentlichung'
|
141
|
+
publication_title_ssi: 'Zeitung'
|
142
|
+
publication_date_dtsi: 'Erscheinungsdatum'
|
143
|
+
publication_date_start_dtsi: 'Erscheinungsdatum (Beginn)'
|
144
|
+
publication_date_end_dtsi: 'Erscheinungsdatum (Ende)'
|
47
145
|
results:
|
48
146
|
snippets:
|
49
147
|
less: '<< weniger'
|
@@ -2,6 +2,52 @@
|
|
2
2
|
en:
|
3
3
|
newspaper_works:
|
4
4
|
attributes:
|
5
|
+
alternative_title:
|
6
|
+
label: 'Alternative title'
|
7
|
+
genre:
|
8
|
+
label: 'Article type'
|
9
|
+
held_by:
|
10
|
+
label: 'Held by'
|
11
|
+
text_direction:
|
12
|
+
label: 'Text direction'
|
13
|
+
page_number:
|
14
|
+
label: 'Page'
|
15
|
+
section:
|
16
|
+
label: 'Section'
|
17
|
+
author:
|
18
|
+
label: 'Author'
|
19
|
+
photographer:
|
20
|
+
label: 'Photographer'
|
21
|
+
volume:
|
22
|
+
label: 'Volume'
|
23
|
+
issue_number:
|
24
|
+
label: 'Issue'
|
25
|
+
edition_name:
|
26
|
+
label: 'Edition'
|
27
|
+
edition_number:
|
28
|
+
label: 'Edition #'
|
29
|
+
geographic_coverage:
|
30
|
+
label: 'Coverage'
|
31
|
+
extent:
|
32
|
+
label: 'Extent'
|
33
|
+
publication_date:
|
34
|
+
label: 'Publication date'
|
35
|
+
publication_date_start:
|
36
|
+
label: 'Publication date (start)'
|
37
|
+
publication_date_end:
|
38
|
+
label: 'Publication date (end)'
|
39
|
+
frequency:
|
40
|
+
label: 'Frequency of publication'
|
41
|
+
preceded_by:
|
42
|
+
label: 'Preceded by'
|
43
|
+
succeeded_by:
|
44
|
+
label: 'Succeeded by'
|
45
|
+
issn:
|
46
|
+
label: 'ISSN'
|
47
|
+
lccn:
|
48
|
+
label: 'LCCN'
|
49
|
+
oclcnum:
|
50
|
+
label: 'OCLC #'
|
5
51
|
place_of_publication:
|
6
52
|
label: 'Place of publication'
|
7
53
|
publication_title:
|
@@ -32,6 +78,27 @@ en:
|
|
32
78
|
front_pages_link: 'View all front pages'
|
33
79
|
|
34
80
|
simple_form:
|
81
|
+
hints:
|
82
|
+
defaults:
|
83
|
+
held_by: 'The name of the library or organization that holds the original item.'
|
84
|
+
place_of_publication: 'The location where the item was published.'
|
85
|
+
alternative_title: 'An additional title or subtitle for the item.'
|
86
|
+
edition_name: 'The name of the edition of the item (Evening, Sunday, etc.).'
|
87
|
+
edition_number: "The number of the edition of the item (should be numeric, use '1' as default)."
|
88
|
+
extent: 'A description of the physical extent of the item (number of pages, size, materials, etc.).'
|
89
|
+
frequency: 'The frequency of publication (Daily, Weekly, etc.).'
|
90
|
+
preceded_by: 'The title that preceded this publication.'
|
91
|
+
succeeded_by: 'The title that followed this publication.'
|
92
|
+
publication_date: 'The date of publication (use YYYY-MM-DD format).'
|
93
|
+
publication_date_start: 'The initial date of publication (use YYYY-MM-DD format).'
|
94
|
+
publication_date_end: 'The final date of publication (use YYYY-MM-DD format).'
|
95
|
+
genre: 'Pre-defined categories to describe the article type. More than one may be selected.'
|
96
|
+
author: 'The name of the author of the article.'
|
97
|
+
photographer: 'The name of the photographer for photos accompanying the article.'
|
98
|
+
geographic_coverage: 'The location(s) described in the article.'
|
99
|
+
page_number: 'The pagination information, as written on the page.'
|
100
|
+
text_direction: 'The reading direction of the text. Enter as "ltr" (left-to-right) or "rtl (right-to-left)"'
|
101
|
+
section: 'The section of the newspaper the item appears in (Business, Sports, etc.)'
|
35
102
|
labels:
|
36
103
|
defaults:
|
37
104
|
held_by: 'Holding location'
|
@@ -2,6 +2,58 @@
|
|
2
2
|
es:
|
3
3
|
newspaper_works:
|
4
4
|
attributes:
|
5
|
+
held_by:
|
6
|
+
label: 'Ubicación de retención'
|
7
|
+
identifier:
|
8
|
+
label: 'Identificador (local)'
|
9
|
+
issn:
|
10
|
+
label: 'ISSN'
|
11
|
+
lccn:
|
12
|
+
label: 'LCCN'
|
13
|
+
oclcnum:
|
14
|
+
label: 'OCLC #'
|
15
|
+
publication_date_end:
|
16
|
+
label: 'Fecha de publicación (final)'
|
17
|
+
publication_date_start:
|
18
|
+
label: 'Fecha de publicación (inicio)'
|
19
|
+
publication_date:
|
20
|
+
label: 'Fecha de publicación'
|
21
|
+
alternative_title:
|
22
|
+
label: 'Título adicional'
|
23
|
+
edition_name:
|
24
|
+
label: 'Nombre de la edición'
|
25
|
+
edition_number:
|
26
|
+
label: 'Número de la edición'
|
27
|
+
volume:
|
28
|
+
label: 'Volumen'
|
29
|
+
issue_number:
|
30
|
+
label: 'Número de emisión'
|
31
|
+
page_number:
|
32
|
+
label: 'Paginación'
|
33
|
+
section:
|
34
|
+
label: 'Sección'
|
35
|
+
extent:
|
36
|
+
label: 'Características físicas'
|
37
|
+
frequency:
|
38
|
+
label: 'Frecuencia'
|
39
|
+
preceded_by:
|
40
|
+
label: 'Título anterior'
|
41
|
+
succeeded_by:
|
42
|
+
label: 'Título sucesivo'
|
43
|
+
genre:
|
44
|
+
label: 'Género'
|
45
|
+
geographic_coverage:
|
46
|
+
label: 'Lugares descritos'
|
47
|
+
author:
|
48
|
+
label: 'Autor'
|
49
|
+
photographer:
|
50
|
+
label: 'Fotógrafo'
|
51
|
+
height:
|
52
|
+
label: 'Altura'
|
53
|
+
width:
|
54
|
+
label: 'Anchura'
|
55
|
+
text_direction:
|
56
|
+
label: 'Dirección texto'
|
5
57
|
place_of_publication:
|
6
58
|
label: 'Lugar de publicacion'
|
7
59
|
publication_title:
|
@@ -32,6 +84,25 @@ es:
|
|
32
84
|
front_pages_link: 'Ver todas las portadas'
|
33
85
|
|
34
86
|
simple_form:
|
87
|
+
hints:
|
88
|
+
defaults:
|
89
|
+
held_by: 'El nombre de la biblioteca u organización que contiene el elemento original.'
|
90
|
+
place_of_publication: 'La ubicación donde se publicó el artículo.'
|
91
|
+
alternative_title: 'Un título o subtítulo adicional para el artículo.'
|
92
|
+
edition_name: 'El nombre de la edición del artículo.'
|
93
|
+
edition_number: "El número de la edición del artículo."
|
94
|
+
extent: 'Una descripción de las características físicas del artículo.'
|
95
|
+
frequency: 'Frecuencia de publicación.'
|
96
|
+
preceded_by: 'El título que precedió a esta publicación.'
|
97
|
+
succeeded_by: 'El título que siguió a esta publicación.'
|
98
|
+
publication_date: 'Fecha de publicación (YYYY-MM-DD).'
|
99
|
+
publication_date_start: 'Fecha inicio de publicación (YYYY-MM-DD).'
|
100
|
+
publication_date_end: 'Fecha final de publicación (use YYYY-MM-DD format).'
|
101
|
+
genre: 'Tipo de artículo periodístico.'
|
102
|
+
geographic_coverage: 'La ubicación descrita en este artículo.'
|
103
|
+
page_number: 'La información de paginación, tal como está escrita en la página.'
|
104
|
+
text_direction: 'La dirección en la que se lee el texto.'
|
105
|
+
section: 'La sección del periódico en la que aparece el artículo.'
|
35
106
|
labels:
|
36
107
|
defaults:
|
37
108
|
held_by: 'Ubicación de retención'
|
@@ -41,11 +112,36 @@ es:
|
|
41
112
|
oclcnum: 'OCLC #'
|
42
113
|
publication_date_end: 'Fecha de publicación (final)'
|
43
114
|
publication_date_start: 'Fecha de publicación (inicio)'
|
115
|
+
publication_date: 'Fecha de publicación'
|
116
|
+
place_of_publication: 'Ubicación de publicación'
|
117
|
+
alternative_title: 'Título adicional'
|
118
|
+
edition_name: "Nombre de la edición"
|
119
|
+
edition_number: "Número de la edición"
|
120
|
+
volume: 'Volumen'
|
121
|
+
issue_number: 'Número de emisión'
|
122
|
+
page_number: 'Paginación'
|
123
|
+
section: 'Sección'
|
124
|
+
extent: "Características físicas"
|
125
|
+
frequency: 'Frecuencia'
|
126
|
+
preceded_by: 'Título anterior'
|
127
|
+
succeeded_by: 'Título sucesivo'
|
128
|
+
genre: "Género"
|
129
|
+
geographic_coverage: "Lugares descritos"
|
130
|
+
author: 'Autor'
|
131
|
+
photographer: 'Fotógrafo'
|
132
|
+
height: 'Altura'
|
133
|
+
width: 'Anchura'
|
134
|
+
text_direction: 'Dirección texto'
|
44
135
|
|
45
136
|
blacklight:
|
46
137
|
search:
|
47
138
|
fields:
|
48
139
|
all_text_tsimv: 'Coincidencias de palabras clave'
|
140
|
+
place_of_publication_label_tesim: 'Ubicación de publicació'
|
141
|
+
publication_title_ssi: 'Título de periódico'
|
142
|
+
publication_date_dtsi: 'Fecha de publicación'
|
143
|
+
publication_date_start_dtsi: 'Fecha de publicación (inicio)'
|
144
|
+
publication_date_end_dtsi: 'Fecha de publicación (final)'
|
49
145
|
results:
|
50
146
|
snippets:
|
51
147
|
less: '<< menos'
|