iiif_print 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile.lock +2 -2
- data/README.md +4 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +1 -1
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +37 -22
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/{lib → app/jobs}/iiif_print/jobs/child_works_from_pdf_job.rb +14 -9
- data/{lib → app/jobs}/iiif_print/jobs/create_relationships_job.rb +10 -20
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +1 -1
- data/app/models/concerns/iiif_print/solr/document.rb +5 -3
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +5 -2
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +4 -2
- data/app/services/iiif_print/pluggable_derivative_service.rb +5 -1
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/hyrax/file_sets/_show_actions.html.erb +1 -1
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +3 -3
- data/iiif_print.gemspec +1 -1
- data/lib/iiif_print/base_derivative_service.rb +13 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +2 -2
- data/lib/iiif_print/catalog_search_builder.rb +2 -2
- data/lib/iiif_print/configuration.rb +65 -5
- data/lib/iiif_print/data/fileset_helper.rb +2 -2
- data/lib/iiif_print/data/work_derivatives.rb +1 -1
- data/lib/iiif_print/engine.rb +46 -2
- data/lib/iiif_print/homepage_search_builder.rb +2 -2
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +19 -6
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +11 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +19 -9
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +5 -16
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +79 -44
- metadata +18 -191
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -40
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -46
- data/bin/rails +0 -13
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -20
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/authorities/licenses.yml +0 -4
- data/spec/fixtures/authorities/rights_statements.yml +0 -4
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -28
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -59
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -193
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -35
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -118
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -249
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +0 -27
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +0 -80
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +0 -92
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +0 -22
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +0 -18
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +0 -19
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -171
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +0 -27
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -70
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/samvera/derivatives/configuration_spec.rb +0 -41
- data/spec/samvera/derivatives/hyrax_spec.rb +0 -62
- data/spec/samvera/derivatives_spec.rb +0 -54
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +0 -103
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +0 -20
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -175
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
- /data/{lib → app/jobs}/iiif_print/jobs/request_split_pdf_job.rb +0 -0
Binary file
|
@@ -1,16 +0,0 @@
|
|
1
|
-
## Sample credits
|
2
|
-
|
3
|
-
* `sample-color-newsletter.pdf` — originally via:
|
4
|
-
- http://digitalpreservation.gov/news/newsletter/201306.pdf
|
5
|
-
- Re-rendered with embedded fonts via Apple Preview.app
|
6
|
-
* `ndnp-sample1.pdf` — originally via:
|
7
|
-
- https://chroniclingamerica.loc.gov/data/batches/batch_dlc_harry_ver01/data/sn82016187/00211102366/1898111001/0528.pdf
|
8
|
-
- `lowres-gray-via-ndnp-sample.tiff` based on this page image used as a
|
9
|
-
low resolution 8 bit grayscale TIFF test image in derivative testing.
|
10
|
-
- `ocr_gray.tiff`, `ocr_color.tiff`, and `ocr_gray.jp2` are all derived
|
11
|
-
from this source.
|
12
|
-
* `4.1.07.tiff` is a low-res color sample image from USC-SIPI Image Database
|
13
|
-
and is licensed as "Free to use". Via http://sipi.usc.edu/database/
|
14
|
-
- 4.1.07.jp2 is a derivative of this made via opj_compress
|
15
|
-
* `ndnp-alto-sample.xml` from NDNP Sample Content, via:
|
16
|
-
- https://chroniclingamerica.loc.gov/data/batches/batch_dlc_harry_ver01/data/sn82016187/00211102366/1898111001/0528.xml
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,31 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
|
3
|
-
<!-- Minimal ALTO 2.0 (per NDNP 2018 guidelines) example should comply
|
4
|
-
with schema@ https://www.loc.gov/standards/alto/v2/alto-2-0.xsd
|
5
|
-
-->
|
6
|
-
<Description>
|
7
|
-
<MeasurementUnit>pixel</MeasurementUnit>
|
8
|
-
</Description>
|
9
|
-
<!-- no Styles element in this minimal ALTO -->
|
10
|
-
<Layout>
|
11
|
-
<Page ID="ID1" PHYSICAL_IMG_NR="1">
|
12
|
-
<!-- coordinates as float -->
|
13
|
-
<PrintSpace HEIGHT="" WIDTH="" HPOS="" VPOS="">
|
14
|
-
<!-- if we do not have block/line positioning, and only have
|
15
|
-
word coordinates, we end up with something a bit messy
|
16
|
-
treating the whole document as one big line of text to
|
17
|
-
simply comply with ALTO schema.
|
18
|
-
-->
|
19
|
-
<TextBlock HEIGHT="" WIDTH="" HPOS="" VPOS="">
|
20
|
-
<TextLine HEIGHT="" WIDTH="" HPOS="" VPOS="">
|
21
|
-
<String HEIGHT="" WIDTH="" HPOS="" VPOS="" CONTENT="This"></String>
|
22
|
-
<String HEIGHT="" WIDTH="" HPOS="" VPOS="" CONTENT="is"></String>
|
23
|
-
<String HEIGHT="" WIDTH="" HPOS="" VPOS="" CONTENT="only"></String>
|
24
|
-
<String HEIGHT="" WIDTH="" HPOS="" VPOS="" CONTENT="a"></String>
|
25
|
-
<String HEIGHT="" WIDTH="" HPOS="" VPOS="" CONTENT="test."></String>
|
26
|
-
</TextLine>
|
27
|
-
</TextBlock>
|
28
|
-
</PrintSpace>
|
29
|
-
</Page>
|
30
|
-
</Layout>
|
31
|
-
</alto>
|