newspaper_works 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,322 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is a stripped down config file used for a simple example...
21
+ It is *not* a good example to work from.
22
+ -->
23
+ <config>
24
+
25
+ <!-- Controls what version of Lucene various components of Solr
26
+ adhere to. Generally, you want to use the latest version to
27
+ get all bug fixes and improvements. It is highly recommended
28
+ that you fully re-index after changing this setting as it can
29
+ affect both how text is indexed and queried.
30
+ -->
31
+ <luceneMatchVersion>5.0.0</luceneMatchVersion>
32
+
33
+ <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
34
+ <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
35
+ <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
36
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
37
+
38
+ <directoryFactory name="DirectoryFactory"
39
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
40
+ </directoryFactory>
41
+
42
+ <codecFactory class="solr.SchemaCodecFactory"/>
43
+
44
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
45
+
46
+
47
+ <dataDir>${solr.blacklight-core.data.dir:}</dataDir>
48
+
49
+ <requestDispatcher handleSelect="true" >
50
+ <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048000" />
51
+ </requestDispatcher>
52
+
53
+ <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
54
+
55
+ <!-- config for the admin interface -->
56
+ <admin>
57
+ <defaultQuery>*:*</defaultQuery>
58
+ </admin>
59
+
60
+ <updateHandler class="solr.DirectUpdateHandler2">
61
+ <updateLog>
62
+ <str name="dir">${solr.ulog.dir:}</str>
63
+ </updateLog>
64
+
65
+ <autoCommit>
66
+ <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
67
+ <openSearcher>false</openSearcher>
68
+ </autoCommit>
69
+
70
+ <autoSoftCommit>
71
+ <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
72
+ </autoSoftCommit>
73
+ </updateHandler>
74
+
75
+ <!-- SearchHandler
76
+
77
+ http://wiki.apache.org/solr/SearchHandler
78
+
79
+ For processing Search Queries, the primary Request Handler
80
+ provided with Solr is "SearchHandler" It delegates to a sequent
81
+ of SearchComponents (see below) and supports distributed
82
+ queries across multiple shards
83
+ -->
84
+ <requestHandler name="search" class="solr.SearchHandler" default="true">
85
+ <!-- default values for query parameters can be specified, these
86
+ will be overridden by parameters in the request
87
+ -->
88
+ <lst name="defaults">
89
+ <str name="defType">edismax</str>
90
+ <str name="echoParams">explicit</str>
91
+ <str name="q.alt">*:*</str>
92
+ <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
93
+ <int name="qs">1</int>
94
+ <int name="ps">2</int>
95
+ <float name="tie">0.01</float>
96
+ <!-- this qf and pf are used by default, if not otherwise specified by
97
+ client. The default blacklight_config will use these for the
98
+ "keywords" search. See the author_qf/author_pf, title_qf, etc
99
+ below, which the default blacklight_config will specify for
100
+ those searches. You may also be interested in:
101
+ http://wiki.apache.org/solr/LocalParams
102
+ -->
103
+ <str name="qf">
104
+ id
105
+ title_tesim
106
+ author_tesim
107
+ subject_tesim
108
+ </str>
109
+ <str name="pf">
110
+ all_text_timv^10
111
+ </str>
112
+
113
+ <str name="author_qf">
114
+ author_tesim
115
+ </str>
116
+ <str name="author_pf">
117
+ </str>
118
+ <str name="title_qf">
119
+ title_tesim
120
+ </str>
121
+ <str name="title_pf">
122
+ </str>
123
+ <str name="subject_qf">
124
+ subject_tesim
125
+ </str>
126
+ <str name="subject_pf">
127
+ </str>
128
+
129
+ <str name="fl">
130
+ *,
131
+ score
132
+ </str>
133
+
134
+ <str name="facet">true</str>
135
+ <str name="facet.mincount">1</str>
136
+
137
+ <str name="spellcheck">true</str>
138
+ <str name="spellcheck.dictionary">default</str>
139
+ <str name="spellcheck.onlyMorePopular">true</str>
140
+ <str name="spellcheck.extendedResults">true</str>
141
+ <str name="spellcheck.collate">false</str>
142
+ <str name="spellcheck.count">5</str>
143
+
144
+ </lst>
145
+ <arr name="last-components">
146
+ <str>spellcheck</str>
147
+ </arr>
148
+ </requestHandler>
149
+
150
+ <requestHandler name="permissions" class="solr.SearchHandler" >
151
+ <lst name="defaults">
152
+ <str name="facet">off</str>
153
+ <str name="echoParams">all</str>
154
+ <str name="rows">1</str>
155
+ <str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
156
+ <str name="fl">
157
+ id,
158
+ access_ssim,
159
+ discover_access_group_ssim,discover_access_person_ssim,
160
+ read_access_group_ssim,read_access_person_ssim,
161
+ edit_access_group_ssim,edit_access_person_ssim,
162
+ depositor_ti,
163
+ embargo_release_date_dtsi
164
+ inheritable_access_ssim,
165
+ inheritable_discover_access_group_ssim,inheritable_discover_access_person_ssim,
166
+ inheritable_read_access_group_ssim,inheritable_read_access_person_ssim,
167
+ inheritable_edit_access_group_ssim,inheritable_edit_access_person_ssim,
168
+ inheritable_embargo_release_date_dtsi
169
+ </str>
170
+ </lst>
171
+ </requestHandler>
172
+
173
+ <requestHandler name="standard" class="solr.SearchHandler">
174
+ <lst name="defaults">
175
+ <str name="echoParams">explicit</str>
176
+ <str name="defType">lucene</str>
177
+ </lst>
178
+ </requestHandler>
179
+
180
+ <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
181
+ <requestHandler name="document" class="solr.SearchHandler" >
182
+ <lst name="defaults">
183
+ <str name="echoParams">all</str>
184
+ <str name="fl">*</str>
185
+ <str name="rows">1</str>
186
+ <str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
187
+ </lst>
188
+ </requestHandler>
189
+
190
+ <searchComponent name="termsComponent" class="solr.TermsComponent" />
191
+
192
+ <requestHandler name="/terms" class="solr.SearchHandler">
193
+ <lst name="defaults">
194
+ <bool name="terms">true</bool>
195
+ </lst>
196
+ <arr name="components">
197
+ <str>termsComponent</str>
198
+ </arr>
199
+ </requestHandler>
200
+
201
+ <!-- Spell Check
202
+
203
+ The spell check component can return a list of alternative spelling
204
+ suggestions.
205
+
206
+ http://wiki.apache.org/solr/SpellCheckComponent
207
+ -->
208
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
209
+
210
+ <str name="queryAnalyzerFieldType">textSpell</str>
211
+
212
+ <!-- Multiple "Spell Checkers" can be declared and used by this
213
+ component
214
+ -->
215
+
216
+ <!-- a spellchecker built from a field of the main index, and
217
+ written to disk
218
+ -->
219
+ <lst name="spellchecker">
220
+ <str name="name">default</str>
221
+ <str name="field">spell</str>
222
+ <str name="spellcheckIndexDir">./spell</str>
223
+ <str name="buildOnOptimize">true</str>
224
+ </lst>
225
+ <lst name="spellchecker">
226
+ <str name="name">author</str>
227
+ <str name="field">author_spell</str>
228
+ <str name="spellcheckIndexDir">./spell_author</str>
229
+ <str name="accuracy">0.7</str>
230
+ <str name="buildOnOptimize">true</str>
231
+ </lst>
232
+ <lst name="spellchecker">
233
+ <str name="name">subject</str>
234
+ <str name="field">subject_spell</str>
235
+ <str name="spellcheckIndexDir">./spell_subject</str>
236
+ <str name="accuracy">0.7</str>
237
+ <str name="buildOnOptimize">true</str>
238
+ </lst>
239
+ <lst name="spellchecker">
240
+ <str name="name">title</str>
241
+ <str name="field">title_spell</str>
242
+ <str name="spellcheckIndexDir">./spell_title</str>
243
+ <str name="accuracy">0.7</str>
244
+ <str name="buildOnOptimize">true</str>
245
+ </lst>
246
+
247
+ <!-- a spellchecker that uses a different distance measure -->
248
+ <!--
249
+ <lst name="spellchecker">
250
+ <str name="name">jarowinkler</str>
251
+ <str name="field">spell</str>
252
+ <str name="distanceMeasure">
253
+ org.apache.lucene.search.spell.JaroWinklerDistance
254
+ </str>
255
+ <str name="spellcheckIndexDir">spellcheckerJaro</str>
256
+ </lst>
257
+ -->
258
+
259
+ <!-- a spellchecker that use an alternate comparator
260
+
261
+ comparatorClass be one of:
262
+ 1. score (default)
263
+ 2. freq (Frequency first, then score)
264
+ 3. A fully qualified class name
265
+ -->
266
+ <!--
267
+ <lst name="spellchecker">
268
+ <str name="name">freq</str>
269
+ <str name="field">lowerfilt</str>
270
+ <str name="spellcheckIndexDir">spellcheckerFreq</str>
271
+ <str name="comparatorClass">freq</str>
272
+ <str name="buildOnCommit">true</str>
273
+ -->
274
+
275
+ <!-- A spellchecker that reads the list of words from a file -->
276
+ <!--
277
+ <lst name="spellchecker">
278
+ <str name="classname">solr.FileBasedSpellChecker</str>
279
+ <str name="name">file</str>
280
+ <str name="sourceLocation">spellings.txt</str>
281
+ <str name="characterEncoding">UTF-8</str>
282
+ <str name="spellcheckIndexDir">spellcheckerFile</str>
283
+ </lst>
284
+ -->
285
+ </searchComponent>
286
+
287
+ <searchComponent name="suggest" class="solr.SuggestComponent">
288
+ <lst name="suggester">
289
+ <str name="name">mySuggester</str>
290
+ <str name="lookupImpl">FuzzyLookupFactory</str>
291
+ <str name="suggestAnalyzerFieldType">textSuggest</str>
292
+ <str name="buildOnCommit">true</str>
293
+ <str name="field">suggest</str>
294
+ </lst>
295
+ </searchComponent>
296
+
297
+ <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
298
+ <lst name="defaults">
299
+ <str name="suggest">true</str>
300
+ <str name="suggest.count">5</str>
301
+ <str name="suggest.dictionary">mySuggester</str>
302
+ </lst>
303
+ <arr name="components">
304
+ <str>suggest</str>
305
+ </arr>
306
+ </requestHandler>
307
+
308
+ <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
309
+ <lst name="defaults">
310
+ <str name="fmap.Last-Modified">last_modified</str>
311
+ <str name="uprefix">ignored_</str>
312
+ </lst>
313
+ <!--Optional. Specify a path to a tika configuration file. See the Tika docs for details.-->
314
+ <!-- <str name="tika.config">/my/path/to/tika.config</str> -->
315
+ <!-- Optional. Specify one or more date formats to parse. See DateUtil.DEFAULT_DATE_FORMATS
316
+ for default date formats -->
317
+ <!-- <lst name="date.formats"> -->
318
+ <!-- <str>yyyy&#45;MM&#45;dd</str> -->
319
+ <!-- </lst> -->
320
+ </requestHandler>
321
+ </config>
322
+
@@ -0,0 +1,2 @@
1
+ pizza
2
+ history
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+
@@ -0,0 +1,31 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ #some test synonym mappings unlikely to appear in real input text
15
+ aaa => aaaa
16
+ bbb => bbbb1 bbbb2
17
+ ccc => cccc1,cccc2
18
+ a\=>a => b\=>b
19
+ a\,a => b\,b
20
+ fooaaa,baraaa,bazaaa
21
+
22
+ # Some synonym groups specific to this example
23
+ GB,gib,gigabyte,gigabytes
24
+ MB,mib,megabyte,megabytes
25
+ Television, Televisions, TV, TVs
26
+ #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
27
+ #after us won't split it into two words.
28
+
29
+ # Synonym mappings can be used for spelling correction too
30
+ pixima => pixma
31
+
@@ -0,0 +1,132 @@
1
+ <?xml version='1.0' encoding='UTF-8'?>
2
+
3
+ <!--
4
+ * Licensed to the Apache Software Foundation (ASF) under one or more
5
+ * contributor license agreements. See the NOTICE file distributed with
6
+ * this work for additional information regarding copyright ownership.
7
+ * The ASF licenses this file to You under the Apache License, Version 2.0
8
+ * (the "License"); you may not use this file except in compliance with
9
+ * the License. You may obtain a copy of the License at
10
+ *
11
+ * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ * See the License for the specific language governing permissions and
17
+ * limitations under the License.
18
+ -->
19
+
20
+ <!--
21
+ Simple transform of Solr query results to HTML
22
+ -->
23
+ <xsl:stylesheet version='1.0'
24
+ xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
25
+ >
26
+
27
+ <xsl:output media-type="text/html; charset=UTF-8" encoding="UTF-8"/>
28
+
29
+ <xsl:variable name="title" select="concat('Solr search results (',response/result/@numFound,' documents)')"/>
30
+
31
+ <xsl:template match='/'>
32
+ <html>
33
+ <head>
34
+ <title><xsl:value-of select="$title"/></title>
35
+ <xsl:call-template name="css"/>
36
+ </head>
37
+ <body>
38
+ <h1><xsl:value-of select="$title"/></h1>
39
+ <div class="note">
40
+ This has been formatted by the sample "example.xsl" transform -
41
+ use your own XSLT to get a nicer page
42
+ </div>
43
+ <xsl:apply-templates select="response/result/doc"/>
44
+ </body>
45
+ </html>
46
+ </xsl:template>
47
+
48
+ <xsl:template match="doc">
49
+ <xsl:variable name="pos" select="position()"/>
50
+ <div class="doc">
51
+ <table width="100%">
52
+ <xsl:apply-templates>
53
+ <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
54
+ </xsl:apply-templates>
55
+ </table>
56
+ </div>
57
+ </xsl:template>
58
+
59
+ <xsl:template match="doc/*[@name='score']" priority="100">
60
+ <xsl:param name="pos"></xsl:param>
61
+ <tr>
62
+ <td class="name">
63
+ <xsl:value-of select="@name"/>
64
+ </td>
65
+ <td class="value">
66
+ <xsl:value-of select="."/>
67
+
68
+ <xsl:if test="boolean(//lst[@name='explain'])">
69
+ <xsl:element name="a">
70
+ <!-- can't allow whitespace here -->
71
+ <xsl:attribute name="href">javascript:toggle("<xsl:value-of select="concat('exp-',$pos)" />");</xsl:attribute>?</xsl:element>
72
+ <br/>
73
+ <xsl:element name="div">
74
+ <xsl:attribute name="class">exp</xsl:attribute>
75
+ <xsl:attribute name="id">
76
+ <xsl:value-of select="concat('exp-',$pos)" />
77
+ </xsl:attribute>
78
+ <xsl:value-of select="//lst[@name='explain']/str[position()=$pos]"/>
79
+ </xsl:element>
80
+ </xsl:if>
81
+ </td>
82
+ </tr>
83
+ </xsl:template>
84
+
85
+ <xsl:template match="doc/arr" priority="100">
86
+ <tr>
87
+ <td class="name">
88
+ <xsl:value-of select="@name"/>
89
+ </td>
90
+ <td class="value">
91
+ <ul>
92
+ <xsl:for-each select="*">
93
+ <li><xsl:value-of select="."/></li>
94
+ </xsl:for-each>
95
+ </ul>
96
+ </td>
97
+ </tr>
98
+ </xsl:template>
99
+
100
+
101
+ <xsl:template match="doc/*">
102
+ <tr>
103
+ <td class="name">
104
+ <xsl:value-of select="@name"/>
105
+ </td>
106
+ <td class="value">
107
+ <xsl:value-of select="."/>
108
+ </td>
109
+ </tr>
110
+ </xsl:template>
111
+
112
+ <xsl:template match="*"/>
113
+
114
+ <xsl:template name="css">
115
+ <script>
116
+ function toggle(id) {
117
+ var obj = document.getElementById(id);
118
+ obj.style.display = (obj.style.display != 'block') ? 'block' : 'none';
119
+ }
120
+ </script>
121
+ <style type="text/css">
122
+ body { font-family: "Lucida Grande", sans-serif }
123
+ td.name { font-style: italic; font-size:80%; }
124
+ td { vertical-align: top; }
125
+ ul { margin: 0px; margin-left: 1em; padding: 0px; }
126
+ .note { font-size:80%; }
127
+ .doc { margin-top: 1em; border-top: solid grey 1px; }
128
+ .exp { display: none; font-family: monospace; white-space: pre; }
129
+ </style>
130
+ </xsl:template>
131
+
132
+ </xsl:stylesheet>
@@ -0,0 +1,67 @@
1
+ <?xml version='1.0' encoding='UTF-8'?>
2
+
3
+ <!--
4
+ * Licensed to the Apache Software Foundation (ASF) under one or more
5
+ * contributor license agreements. See the NOTICE file distributed with
6
+ * this work for additional information regarding copyright ownership.
7
+ * The ASF licenses this file to You under the Apache License, Version 2.0
8
+ * (the "License"); you may not use this file except in compliance with
9
+ * the License. You may obtain a copy of the License at
10
+ *
11
+ * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ * See the License for the specific language governing permissions and
17
+ * limitations under the License.
18
+ -->
19
+
20
+ <!--
21
+ Simple transform of Solr query results to Atom
22
+ -->
23
+
24
+ <xsl:stylesheet version='1.0'
25
+ xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
26
+
27
+ <xsl:output
28
+ method="xml"
29
+ encoding="utf-8"
30
+ media-type="text/xml; charset=UTF-8"
31
+ />
32
+
33
+ <xsl:template match='/'>
34
+ <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/>
35
+ <feed xmlns="http://www.w3.org/2005/Atom">
36
+ <title>Example Solr Atom 1.0 Feed</title>
37
+ <subtitle>
38
+ This has been formatted by the sample "example_atom.xsl" transform -
39
+ use your own XSLT to get a nicer Atom feed.
40
+ </subtitle>
41
+ <author>
42
+ <name>Apache Solr</name>
43
+ <email>solr-user@lucene.apache.org</email>
44
+ </author>
45
+ <link rel="self" type="application/atom+xml"
46
+ href="http://localhost:8983/solr/q={$query}&amp;wt=xslt&amp;tr=atom.xsl"/>
47
+ <updated>
48
+ <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/>
49
+ </updated>
50
+ <id>tag:localhost,2007:example</id>
51
+ <xsl:apply-templates select="response/result/doc"/>
52
+ </feed>
53
+ </xsl:template>
54
+
55
+ <!-- search results xslt -->
56
+ <xsl:template match="doc">
57
+ <xsl:variable name="id" select="str[@name='id']"/>
58
+ <entry>
59
+ <title><xsl:value-of select="str[@name='name']"/></title>
60
+ <link href="http://localhost:8983/solr/select?q={$id}"/>
61
+ <id>tag:localhost,2007:<xsl:value-of select="$id"/></id>
62
+ <summary><xsl:value-of select="arr[@name='features']"/></summary>
63
+ <updated><xsl:value-of select="date[@name='timestamp']"/></updated>
64
+ </entry>
65
+ </xsl:template>
66
+
67
+ </xsl:stylesheet>