newspaper_works 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.coveralls.yml +2 -0
- data/.gitignore +4 -0
- data/.travis.yml +2 -2
- data/README.md +14 -13
- data/app/services/newspaper_works/jp2_derivative_service.rb +1 -3
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +37 -15
- data/app/services/newspaper_works/pdf_derivative_service.rb +4 -7
- data/app/services/newspaper_works/tiff_derivative_service.rb +5 -9
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +72 -24
- data/config/locales/newspaper_article.de.yml +1 -1
- data/config/locales/newspaper_article.en.yml +1 -1
- data/config/locales/newspaper_article.es.yml +1 -1
- data/config/locales/newspaper_article.fr.yml +1 -1
- data/config/locales/newspaper_article.it.yml +1 -1
- data/config/locales/newspaper_article.pt-BR.yml +1 -1
- data/config/locales/newspaper_article.zh.yml +1 -1
- data/config/locales/newspaper_container.de.yml +1 -1
- data/config/locales/newspaper_container.en.yml +1 -1
- data/config/locales/newspaper_container.es.yml +1 -1
- data/config/locales/newspaper_container.fr.yml +1 -1
- data/config/locales/newspaper_container.it.yml +1 -1
- data/config/locales/newspaper_container.pt-BR.yml +1 -1
- data/config/locales/newspaper_container.zh.yml +1 -1
- data/config/locales/newspaper_issue.de.yml +1 -1
- data/config/locales/newspaper_issue.en.yml +1 -1
- data/config/locales/newspaper_issue.es.yml +1 -1
- data/config/locales/newspaper_issue.fr.yml +1 -1
- data/config/locales/newspaper_issue.it.yml +2 -2
- data/config/locales/newspaper_issue.pt-BR.yml +2 -2
- data/config/locales/newspaper_issue.zh.yml +2 -2
- data/config/locales/newspaper_page.de.yml +1 -1
- data/config/locales/newspaper_page.en.yml +1 -1
- data/config/locales/newspaper_page.es.yml +1 -1
- data/config/locales/newspaper_page.fr.yml +1 -1
- data/config/locales/newspaper_page.it.yml +1 -1
- data/config/locales/newspaper_page.pt-BR.yml +1 -1
- data/config/locales/newspaper_page.zh.yml +1 -1
- data/config/locales/newspaper_title.de.yml +1 -1
- data/config/locales/newspaper_title.en.yml +1 -1
- data/config/locales/newspaper_title.es.yml +1 -1
- data/config/locales/newspaper_title.fr.yml +1 -1
- data/config/locales/newspaper_title.it.yml +1 -1
- data/config/locales/newspaper_title.pt-BR.yml +1 -1
- data/config/locales/newspaper_title.zh.yml +1 -1
- data/config/locales/newspaper_works.de.yml +98 -0
- data/config/locales/newspaper_works.en.yml +67 -0
- data/config/locales/newspaper_works.es.yml +96 -0
- data/config/locales/newspaper_works.fr.yml +97 -0
- data/config/locales/newspaper_works.it.yml +90 -0
- data/config/locales/newspaper_works.pt-BR.yml +96 -0
- data/config/locales/newspaper_works.zh.yml +90 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +39 -39
- data/lib/newspaper_works.rb +2 -0
- data/lib/newspaper_works/image_tool.rb +119 -0
- data/lib/newspaper_works/jp2_image_metadata.rb +81 -0
- data/lib/newspaper_works/text_extraction.rb +1 -0
- data/lib/newspaper_works/text_extraction/hocr_reader.rb +173 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +37 -51
- data/lib/newspaper_works/text_extraction/render_alto.rb +4 -4
- data/lib/newspaper_works/version.rb +1 -1
- data/newspaper_works.gemspec +2 -3
- data/spec/features/search_results_thumbnail_highlights_spec.rb +1 -1
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/lib/newspaper_works/image_tool_spec.rb +109 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +3 -3
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +2 -2
- data/spec/lib/newspaper_works/jp2_image_metadata_spec.rb +37 -0
- data/spec/lib/newspaper_works/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +3 -3
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +14 -14
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +10 -13
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +10 -8
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +11 -7
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +17 -10
- data/spec/spec_helper.rb +19 -0
- metadata +21 -22
@@ -2,8 +2,62 @@
|
|
2
2
|
de:
|
3
3
|
newspaper_works:
|
4
4
|
attributes:
|
5
|
+
held_by:
|
6
|
+
label: 'Halteort'
|
7
|
+
identifier:
|
8
|
+
label: 'Kennung (lokal)'
|
9
|
+
issn:
|
10
|
+
label: 'ISSN'
|
11
|
+
lccn:
|
12
|
+
label: 'LCCN'
|
13
|
+
oclcnum:
|
14
|
+
label: 'OCLC #'
|
15
|
+
publication_date_end:
|
16
|
+
label: 'Erscheinungsdatum (Ende)'
|
17
|
+
publication_date_start:
|
18
|
+
label: 'Erscheinungsdatum (Beginn)'
|
19
|
+
publication_date:
|
20
|
+
label: 'Erscheinungsdatum'
|
21
|
+
alternative_title:
|
22
|
+
label: 'Zusätzlicher Titel'
|
23
|
+
edition_name:
|
24
|
+
label: 'Name der Ausgabe'
|
25
|
+
edition_number:
|
26
|
+
label: 'Nummer der Ausgabe'
|
27
|
+
volume:
|
28
|
+
label: 'Volumen'
|
29
|
+
issue_number:
|
30
|
+
label: 'Ausgabe Nummer'
|
31
|
+
page_number:
|
32
|
+
label: 'Seitennummer'
|
33
|
+
section:
|
34
|
+
label: 'Abschnitt'
|
35
|
+
extent:
|
36
|
+
label: 'Physikalischen Eigenschaften'
|
37
|
+
frequency:
|
38
|
+
label: 'Häufigkeit'
|
39
|
+
preceded_by:
|
40
|
+
label: 'Vorhergehender Titel'
|
41
|
+
succeeded_by:
|
42
|
+
label: 'Nachfolgender Titel'
|
43
|
+
genre:
|
44
|
+
label: 'Genre'
|
45
|
+
geographic_coverage:
|
46
|
+
label: 'Orte beschrieben'
|
47
|
+
author:
|
48
|
+
label: 'Autor'
|
49
|
+
photographer:
|
50
|
+
label: 'Fotograf'
|
51
|
+
height:
|
52
|
+
label: 'Höhe'
|
53
|
+
width:
|
54
|
+
label: 'Breite'
|
55
|
+
text_direction:
|
56
|
+
label: 'Richtung der Text'
|
5
57
|
place_of_publication:
|
6
58
|
label: 'Ort der veröffentlichung'
|
59
|
+
publication_title:
|
60
|
+
label: 'Zeitung'
|
7
61
|
newspapers_search:
|
8
62
|
title: 'Suche nach Zeitungen'
|
9
63
|
text: 'Verwenden Sie dieses Formular für die Suche nach Zeitungsinhalten im Volltext.'
|
@@ -30,6 +84,25 @@ de:
|
|
30
84
|
front_pages_link: 'Alle Titelseiten anzeigen'
|
31
85
|
|
32
86
|
simple_form:
|
87
|
+
hints:
|
88
|
+
defaults:
|
89
|
+
held_by: 'Der Name der Bibliothek oder Organisation, in der sich das Originalelement befindet.'
|
90
|
+
place_of_publication: 'Der Ort, an dem der Artikel veröffentlicht wurde.'
|
91
|
+
alternative_title: 'Ein zusätzlicher Titel oder Untertitel für den Artikel.'
|
92
|
+
edition_name: 'Der Name der Ausgabe des Artikels.'
|
93
|
+
edition_number: "Die Nummer der Ausgabe des Artikels."
|
94
|
+
extent: 'Eine Beschreibung der physikalischen Eigenschaften des Artikels.'
|
95
|
+
frequency: 'Häufigkeit der Veröffentlichung.'
|
96
|
+
preceded_by: 'Der Titel vor dieser Veröffentlichung.'
|
97
|
+
succeeded_by: 'Der Titel, der auf diese Veröffentlichung folgte.'
|
98
|
+
publication_date: 'Veröffentlichungsdatum (YYYY-MM-DD).'
|
99
|
+
publication_date_start: 'Erscheinungsdatum Beginn (YYYY-MM-DD).'
|
100
|
+
publication_date_end: 'Erscheinungsdatum Ende (YYYY-MM-DD).'
|
101
|
+
genre: 'Art des Zeitungsartikels.'
|
102
|
+
geographic_coverage: 'Der in diesem Artikel beschriebene Speicherort.'
|
103
|
+
page_number: 'Die Paginierungsinformationen, wie auf der Seite geschrieben.'
|
104
|
+
text_direction: 'Die Richtung, in die der Text gelesen wird.'
|
105
|
+
section: 'Der Abschnitt der Zeitung, in dem der Artikel erscheint.'
|
33
106
|
labels:
|
34
107
|
defaults:
|
35
108
|
held_by: 'Halteort'
|
@@ -39,11 +112,36 @@ de:
|
|
39
112
|
oclcnum: 'OCLC #'
|
40
113
|
publication_date_end: 'Erscheinungsdatum (Ende)'
|
41
114
|
publication_date_start: 'Erscheinungsdatum (Beginn)'
|
115
|
+
publication_date: 'Erscheinungsdatum'
|
116
|
+
place_of_publication: 'Ort der Veröffentlichung'
|
117
|
+
alternative_title: 'Zusätzlicher Titel'
|
118
|
+
edition_name: "Name der Ausgabe"
|
119
|
+
edition_number: "Nummer der Ausgabe"
|
120
|
+
volume: 'Volumen'
|
121
|
+
issue_number: 'Ausgabe Nummer'
|
122
|
+
page_number: 'Seitennummer'
|
123
|
+
section: 'Abschnitt'
|
124
|
+
extent: "Physikalischen Eigenschaften"
|
125
|
+
frequency: 'Häufigkeit'
|
126
|
+
preceded_by: 'Vorhergehender Titel'
|
127
|
+
succeeded_by: 'Nachfolgender Titel'
|
128
|
+
genre: "Genre"
|
129
|
+
geographic_coverage: "Orte beschrieben"
|
130
|
+
author: 'Autor'
|
131
|
+
photographer: 'Fotograf'
|
132
|
+
height: 'Höhe'
|
133
|
+
width: 'Breite'
|
134
|
+
text_direction: 'Richtung der Text'
|
42
135
|
|
43
136
|
blacklight:
|
44
137
|
search:
|
45
138
|
fields:
|
46
139
|
all_text_tsimv: 'Keyword-Übereinstimmungen'
|
140
|
+
place_of_publication_label_tesim: 'Ort der Veröffentlichung'
|
141
|
+
publication_title_ssi: 'Zeitung'
|
142
|
+
publication_date_dtsi: 'Erscheinungsdatum'
|
143
|
+
publication_date_start_dtsi: 'Erscheinungsdatum (Beginn)'
|
144
|
+
publication_date_end_dtsi: 'Erscheinungsdatum (Ende)'
|
47
145
|
results:
|
48
146
|
snippets:
|
49
147
|
less: '<< weniger'
|
@@ -2,6 +2,52 @@
|
|
2
2
|
en:
|
3
3
|
newspaper_works:
|
4
4
|
attributes:
|
5
|
+
alternative_title:
|
6
|
+
label: 'Alternative title'
|
7
|
+
genre:
|
8
|
+
label: 'Article type'
|
9
|
+
held_by:
|
10
|
+
label: 'Held by'
|
11
|
+
text_direction:
|
12
|
+
label: 'Text direction'
|
13
|
+
page_number:
|
14
|
+
label: 'Page'
|
15
|
+
section:
|
16
|
+
label: 'Section'
|
17
|
+
author:
|
18
|
+
label: 'Author'
|
19
|
+
photographer:
|
20
|
+
label: 'Photographer'
|
21
|
+
volume:
|
22
|
+
label: 'Volume'
|
23
|
+
issue_number:
|
24
|
+
label: 'Issue'
|
25
|
+
edition_name:
|
26
|
+
label: 'Edition'
|
27
|
+
edition_number:
|
28
|
+
label: 'Edition #'
|
29
|
+
geographic_coverage:
|
30
|
+
label: 'Coverage'
|
31
|
+
extent:
|
32
|
+
label: 'Extent'
|
33
|
+
publication_date:
|
34
|
+
label: 'Publication date'
|
35
|
+
publication_date_start:
|
36
|
+
label: 'Publication date (start)'
|
37
|
+
publication_date_end:
|
38
|
+
label: 'Publication date (end)'
|
39
|
+
frequency:
|
40
|
+
label: 'Frequency of publication'
|
41
|
+
preceded_by:
|
42
|
+
label: 'Preceded by'
|
43
|
+
succeeded_by:
|
44
|
+
label: 'Succeeded by'
|
45
|
+
issn:
|
46
|
+
label: 'ISSN'
|
47
|
+
lccn:
|
48
|
+
label: 'LCCN'
|
49
|
+
oclcnum:
|
50
|
+
label: 'OCLC #'
|
5
51
|
place_of_publication:
|
6
52
|
label: 'Place of publication'
|
7
53
|
publication_title:
|
@@ -32,6 +78,27 @@ en:
|
|
32
78
|
front_pages_link: 'View all front pages'
|
33
79
|
|
34
80
|
simple_form:
|
81
|
+
hints:
|
82
|
+
defaults:
|
83
|
+
held_by: 'The name of the library or organization that holds the original item.'
|
84
|
+
place_of_publication: 'The location where the item was published.'
|
85
|
+
alternative_title: 'An additional title or subtitle for the item.'
|
86
|
+
edition_name: 'The name of the edition of the item (Evening, Sunday, etc.).'
|
87
|
+
edition_number: "The number of the edition of the item (should be numeric, use '1' as default)."
|
88
|
+
extent: 'A description of the physical extent of the item (number of pages, size, materials, etc.).'
|
89
|
+
frequency: 'The frequency of publication (Daily, Weekly, etc.).'
|
90
|
+
preceded_by: 'The title that preceded this publication.'
|
91
|
+
succeeded_by: 'The title that followed this publication.'
|
92
|
+
publication_date: 'The date of publication (use YYYY-MM-DD format).'
|
93
|
+
publication_date_start: 'The initial date of publication (use YYYY-MM-DD format).'
|
94
|
+
publication_date_end: 'The final date of publication (use YYYY-MM-DD format).'
|
95
|
+
genre: 'Pre-defined categories to describe the article type. More than one may be selected.'
|
96
|
+
author: 'The name of the author of the article.'
|
97
|
+
photographer: 'The name of the photographer for photos accompanying the article.'
|
98
|
+
geographic_coverage: 'The location(s) described in the article.'
|
99
|
+
page_number: 'The pagination information, as written on the page.'
|
100
|
+
text_direction: 'The reading direction of the text. Enter as "ltr" (left-to-right) or "rtl (right-to-left)"'
|
101
|
+
section: 'The section of the newspaper the item appears in (Business, Sports, etc.)'
|
35
102
|
labels:
|
36
103
|
defaults:
|
37
104
|
held_by: 'Holding location'
|
@@ -2,6 +2,58 @@
|
|
2
2
|
es:
|
3
3
|
newspaper_works:
|
4
4
|
attributes:
|
5
|
+
held_by:
|
6
|
+
label: 'Ubicación de retención'
|
7
|
+
identifier:
|
8
|
+
label: 'Identificador (local)'
|
9
|
+
issn:
|
10
|
+
label: 'ISSN'
|
11
|
+
lccn:
|
12
|
+
label: 'LCCN'
|
13
|
+
oclcnum:
|
14
|
+
label: 'OCLC #'
|
15
|
+
publication_date_end:
|
16
|
+
label: 'Fecha de publicación (final)'
|
17
|
+
publication_date_start:
|
18
|
+
label: 'Fecha de publicación (inicio)'
|
19
|
+
publication_date:
|
20
|
+
label: 'Fecha de publicación'
|
21
|
+
alternative_title:
|
22
|
+
label: 'Título adicional'
|
23
|
+
edition_name:
|
24
|
+
label: 'Nombre de la edición'
|
25
|
+
edition_number:
|
26
|
+
label: 'Número de la edición'
|
27
|
+
volume:
|
28
|
+
label: 'Volumen'
|
29
|
+
issue_number:
|
30
|
+
label: 'Número de emisión'
|
31
|
+
page_number:
|
32
|
+
label: 'Paginación'
|
33
|
+
section:
|
34
|
+
label: 'Sección'
|
35
|
+
extent:
|
36
|
+
label: 'Características físicas'
|
37
|
+
frequency:
|
38
|
+
label: 'Frecuencia'
|
39
|
+
preceded_by:
|
40
|
+
label: 'Título anterior'
|
41
|
+
succeeded_by:
|
42
|
+
label: 'Título sucesivo'
|
43
|
+
genre:
|
44
|
+
label: 'Género'
|
45
|
+
geographic_coverage:
|
46
|
+
label: 'Lugares descritos'
|
47
|
+
author:
|
48
|
+
label: 'Autor'
|
49
|
+
photographer:
|
50
|
+
label: 'Fotógrafo'
|
51
|
+
height:
|
52
|
+
label: 'Altura'
|
53
|
+
width:
|
54
|
+
label: 'Anchura'
|
55
|
+
text_direction:
|
56
|
+
label: 'Dirección texto'
|
5
57
|
place_of_publication:
|
6
58
|
label: 'Lugar de publicacion'
|
7
59
|
publication_title:
|
@@ -32,6 +84,25 @@ es:
|
|
32
84
|
front_pages_link: 'Ver todas las portadas'
|
33
85
|
|
34
86
|
simple_form:
|
87
|
+
hints:
|
88
|
+
defaults:
|
89
|
+
held_by: 'El nombre de la biblioteca u organización que contiene el elemento original.'
|
90
|
+
place_of_publication: 'La ubicación donde se publicó el artículo.'
|
91
|
+
alternative_title: 'Un título o subtítulo adicional para el artículo.'
|
92
|
+
edition_name: 'El nombre de la edición del artículo.'
|
93
|
+
edition_number: "El número de la edición del artículo."
|
94
|
+
extent: 'Una descripción de las características físicas del artículo.'
|
95
|
+
frequency: 'Frecuencia de publicación.'
|
96
|
+
preceded_by: 'El título que precedió a esta publicación.'
|
97
|
+
succeeded_by: 'El título que siguió a esta publicación.'
|
98
|
+
publication_date: 'Fecha de publicación (YYYY-MM-DD).'
|
99
|
+
publication_date_start: 'Fecha inicio de publicación (YYYY-MM-DD).'
|
100
|
+
publication_date_end: 'Fecha final de publicación (use YYYY-MM-DD format).'
|
101
|
+
genre: 'Tipo de artículo periodístico.'
|
102
|
+
geographic_coverage: 'La ubicación descrita en este artículo.'
|
103
|
+
page_number: 'La información de paginación, tal como está escrita en la página.'
|
104
|
+
text_direction: 'La dirección en la que se lee el texto.'
|
105
|
+
section: 'La sección del periódico en la que aparece el artículo.'
|
35
106
|
labels:
|
36
107
|
defaults:
|
37
108
|
held_by: 'Ubicación de retención'
|
@@ -41,11 +112,36 @@ es:
|
|
41
112
|
oclcnum: 'OCLC #'
|
42
113
|
publication_date_end: 'Fecha de publicación (final)'
|
43
114
|
publication_date_start: 'Fecha de publicación (inicio)'
|
115
|
+
publication_date: 'Fecha de publicación'
|
116
|
+
place_of_publication: 'Ubicación de publicación'
|
117
|
+
alternative_title: 'Título adicional'
|
118
|
+
edition_name: "Nombre de la edición"
|
119
|
+
edition_number: "Número de la edición"
|
120
|
+
volume: 'Volumen'
|
121
|
+
issue_number: 'Número de emisión'
|
122
|
+
page_number: 'Paginación'
|
123
|
+
section: 'Sección'
|
124
|
+
extent: "Características físicas"
|
125
|
+
frequency: 'Frecuencia'
|
126
|
+
preceded_by: 'Título anterior'
|
127
|
+
succeeded_by: 'Título sucesivo'
|
128
|
+
genre: "Género"
|
129
|
+
geographic_coverage: "Lugares descritos"
|
130
|
+
author: 'Autor'
|
131
|
+
photographer: 'Fotógrafo'
|
132
|
+
height: 'Altura'
|
133
|
+
width: 'Anchura'
|
134
|
+
text_direction: 'Dirección texto'
|
44
135
|
|
45
136
|
blacklight:
|
46
137
|
search:
|
47
138
|
fields:
|
48
139
|
all_text_tsimv: 'Coincidencias de palabras clave'
|
140
|
+
place_of_publication_label_tesim: 'Ubicación de publicació'
|
141
|
+
publication_title_ssi: 'Título de periódico'
|
142
|
+
publication_date_dtsi: 'Fecha de publicación'
|
143
|
+
publication_date_start_dtsi: 'Fecha de publicación (inicio)'
|
144
|
+
publication_date_end_dtsi: 'Fecha de publicación (final)'
|
49
145
|
results:
|
50
146
|
snippets:
|
51
147
|
less: '<< menos'
|