iiif_print 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+ ###################################################################################################
3
+ #
4
+ # The purpose of this file is to define the models we'll use in our spec application. Some of these
5
+ # models are echoes of what downstream apps will define (e.g. FileSet). Other are for internal
6
+ # modeling purposes only.
7
+ #
8
+ ####################################################################################################
9
+ class FakeDerivativeService
10
+ class_attribute :target_extension, default: 'txt'
11
+ def initialize(target_extension: nil)
12
+ self.target_extension = target_extension if target_extension
13
+ @create_called = 0
14
+ @cleanup_called = 0
15
+ end
16
+ attr_reader :create_called, :cleanup_called
17
+
18
+ # Why the #new method?
19
+ #
20
+ # Because the plugin interface assumes we're passing a
21
+ # plugin that responds to `new`. In prod code, that plugin is a class.
22
+ # However, in test, to facilitate observing what methods are called we pass
23
+ # the plugin as an instance of this class (e.g. `plugin =
24
+ # FakeDerivativeService.new`). Later, in the process, the code calls
25
+ # `plugin.new(file_set)`; it is then expected to return something that
26
+ # responds to `create_derivatives` and `cleanup_derivatives`.
27
+ #
28
+ # @see IiifPrint::PluggableDerivativeService#initialize
29
+ # @see IiifPrint::PluggableDerivativeService#services
30
+ #
31
+ # @note FakeDerivativeService.new returns an instance of
32
+ # FakeDerivativeService. Likewise, FakeDerivativeService#new will now
33
+ # return an instance of FakeDerivativeService
34
+ def new(fileset)
35
+ @fileset = fileset
36
+ self
37
+ end
38
+
39
+ def valid?
40
+ true
41
+ end
42
+
43
+ def create_derivatives(filename)
44
+ @create_called += 1
45
+ filename
46
+ end
47
+
48
+ def cleanup_derivatives
49
+ @cleanup_called += 1
50
+ end
51
+ end
52
+
53
+ ##
54
+ # iiif_print requires a file set model that is compatible with Hyrax assumptions. We do not want to
55
+ # add this to app/models because those are loaded in the downstream application; which can create
56
+ class FileSet < ActiveFedora::Base
57
+ include ::Hyrax::FileSetBehavior
58
+ end
59
+
60
+ class MyWork < ActiveFedora::Base
61
+ include ::Hyrax::WorkBehavior
62
+ end
63
+
64
+ class MyWorkNeedsDerivative < ActiveFedora::Base
65
+ attr_accessor :title
66
+ def members
67
+ []
68
+ end
69
+ end
70
+
71
+ class MyWorkDoesNotNeedDerivative < ActiveFedora::Base
72
+ attr_accessor :title
73
+ def members
74
+ []
75
+ end
76
+ end
77
+
78
+ class MyIiifConfiguredWorkWithAllDerivativeServices < ActiveFedora::Base
79
+ include IiifPrint.model_configuration
80
+
81
+ attr_accessor :title
82
+ def members
83
+ []
84
+ end
85
+ end
86
+
87
+ class MyIiifConfiguredWork < ActiveFedora::Base
88
+ include IiifPrint.model_configuration(
89
+ derivative_service_plugins: [FakeDerivativeService]
90
+ )
91
+ attr_accessor :title
92
+ def members
93
+ []
94
+ end
95
+ end
96
+
97
+ # Newspaper Issue
98
+ class NewspaperIssue < ActiveFedora::Base
99
+ # WorkBehavior mixes in minimal ::Hyrax::CoreMetadata fields of
100
+ # depositor, title, date_uploaded, and date_modified.
101
+ # https://samvera.github.io/customize-metadata-model.html#core-metadata
102
+ include ::Hyrax::WorkBehavior
103
+ # BasicMetadata must be included last
104
+ include ::Hyrax::BasicMetadata
105
+ end
106
+
107
+ # TODO: merge this in with whatever is needed from misc_shared.rb
108
+ class WorkWithIiifPrintConfig < ActiveFedora::Base
109
+ include ::Hyrax::WorkBehavior
110
+ include IiifPrint::SetChildFlag
111
+ include IiifPrint.model_configuration(pdf_split_child_model: WorkWithIiifPrintConfig)
112
+ include ::Hyrax::BasicMetadata
113
+
114
+ validates :title, presence: { message: 'Your work must have a title.' }
115
+
116
+ # self.indexer = GenericWorkIndexer
117
+ end
118
+
119
+ class WorkWithOutConfig < ActiveFedora::Base
120
+ include ::Hyrax::WorkBehavior
121
+ include IiifPrint::SetChildFlag
122
+ include ::Hyrax::BasicMetadata
123
+
124
+ validates :title, presence: { message: 'Your work must have a title.' }
125
+
126
+ # self.indexer = GenericWorkIndexer
127
+ end
@@ -0,0 +1,9 @@
1
+ development:
2
+ adapter: solr
3
+ url: <%= ENV['SOLR_URL'] %>/hyrax
4
+ test: &test
5
+ adapter: solr
6
+ url: <%= ENV['SOLR_URL'] %>/hyrax_test
7
+ production:
8
+ adapter: solr
9
+ url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %>
@@ -0,0 +1,15 @@
1
+ development:
2
+ user: fedoraAdmin
3
+ password: fedoraAdmin
4
+ url: <%= ENV['FCREPO_URL'] %>/rest
5
+ base_path: /dev
6
+ test:
7
+ user: fedoraAdmin
8
+ password: fedoraAdmin
9
+ url: <%= ENV['FCREPO_URL'] %>/rest
10
+ base_path: /test
11
+ production:
12
+ user: fedoraAdmin
13
+ password: fedoraAdmin
14
+ url: http://127.0.0.1:8983/fedora/rest
15
+ base_path: /prod
@@ -0,0 +1,40 @@
1
+ # Test App Generator
2
+ require 'rails/generators'
3
+ require 'byebug'
4
+ class TestAppGenerator < Rails::Generators::Base
5
+ source_root File.expand_path('../../../spec/test_app_templates', __dir__)
6
+
7
+ def install_redis
8
+ gem 'redis', '4.8.0'
9
+ Bundler.with_unbundled_env do
10
+ run "bundle install"
11
+ end
12
+ end
13
+
14
+ def install_hyrax
15
+ generate 'hyrax:install', '-f'
16
+ end
17
+
18
+ # TODO not sure why this doesnt work
19
+ # just copy them manually for the moment
20
+ def install_config_files
21
+ copy_file 'blacklight.yml', 'config/blacklight.yml'
22
+ copy_file 'fedora.yml', 'config/fedora.yml'
23
+ copy_file 'redis.yml', 'config/redis.yml'
24
+ copy_file 'solr.yml', 'config/solr.yml'
25
+ copy_file 'solr/conf/schema.xml', 'solr/conf/schema.xml'
26
+ copy_file 'solr/conf/solrconfig.xml', 'solr/conf/solrconfig.xml'
27
+ end
28
+
29
+ def install_engine
30
+ generate 'iiif_print:install'
31
+ end
32
+
33
+ def db_migrations
34
+ rake 'db:migrate'
35
+ end
36
+
37
+ def configure_browse_everything
38
+ generate 'browse_everything:config'
39
+ end
40
+ end
@@ -0,0 +1,9 @@
1
+ development:
2
+ host: <%= ENV['REDIS_HOST'] || 'localhost' %>
3
+ port: 6379
4
+ test:
5
+ host: <%= ENV['REDIS_HOST'] || 'localhost' %>
6
+ port: 6379
7
+ production:
8
+ host: <%= ENV['REDIS_HOST'] || 'localhost' %>
9
+ port: 6379
@@ -0,0 +1,362 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="Hydra Demo Index" version="1.5">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ Applications should change this to reflect the nature of the search collection.
51
+ version="1.4" is Solr's version number for the schema syntax and semantics. It should
52
+ not normally be changed by applications.
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
+ 1.1: multiValued attribute introduced, false by default
55
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
+ 1.3: removed optional field compress feature
57
+ 1.4: default auto-phrase (QueryParser feature) to off
58
+ -->
59
+
60
+ <types>
61
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
62
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
63
+ <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
64
+
65
+ <!-- Default numeric field types. -->
66
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
67
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
68
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
69
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
70
+
71
+ <!-- trie numeric field types for faster range queries -->
72
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
73
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
74
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
75
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
76
+
77
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
78
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
79
+ -->
80
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
81
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
82
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
83
+
84
+ <!-- This point type indexes the coordinates as separate fields (subFields)
85
+ If subFieldType is defined, it references a type, and a dynamic field
86
+ definition is created matching *___<typename>. Alternately, if
87
+ subFieldSuffix is defined, that is used to create the subFields.
88
+ Example: if subFieldType="double", then the coordinates would be
89
+ indexed in fields myloc_0___double,myloc_1___double.
90
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
91
+ in fields myloc_0_d,myloc_1_d
92
+ The subFields are an implementation detail of the fieldType, and end
93
+ users normally should not need to know about them.
94
+ -->
95
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
96
+
97
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
98
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
99
+
100
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
101
+ For more information about this and other Spatial fields new to Solr 4, see:
102
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
103
+ -->
104
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
105
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" />
106
+
107
+ <fieldType name="text" class="solr.TextField" omitNorms="false">
108
+ <analyzer>
109
+ <tokenizer class="solr.ICUTokenizerFactory"/>
110
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
111
+ <filter class="solr.TrimFilterFactory"/>
112
+ </analyzer>
113
+ </fieldType>
114
+
115
+ <!-- A text field that only splits on whitespace for exact matching of words -->
116
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
117
+ <analyzer>
118
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
119
+ <filter class="solr.TrimFilterFactory"/>
120
+ </analyzer>
121
+ </fieldType>
122
+
123
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
124
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
125
+ <analyzer>
126
+ <tokenizer class="solr.KeywordTokenizerFactory" />
127
+ <filter class="solr.ICUFoldingFilterFactory"/>
128
+ <filter class="solr.TrimFilterFactory" />
129
+ </analyzer>
130
+ </fieldtype>
131
+
132
+ <!-- A text field with defaults appropriate for English -->
133
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
134
+ <analyzer>
135
+ <tokenizer class="solr.ICUTokenizerFactory"/>
136
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
137
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
138
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
139
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
140
+ <!--
141
+ <filter class="solr.PorterStemFilterFactory"/>
142
+ -->
143
+ <filter class="solr.TrimFilterFactory"/>
144
+ </analyzer>
145
+ </fieldType>
146
+
147
+ <!-- queries for paths match documents at that path, or in descendent paths -->
148
+ <fieldType name="descendent_path" class="solr.TextField">
149
+ <analyzer type="index">
150
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
151
+ </analyzer>
152
+ <analyzer type="query">
153
+ <tokenizer class="solr.KeywordTokenizerFactory" />
154
+ </analyzer>
155
+ </fieldType>
156
+
157
+ <!-- queries for paths match documents at that path, or in ancestor paths -->
158
+ <fieldType name="ancestor_path" class="solr.TextField">
159
+ <analyzer type="index">
160
+ <tokenizer class="solr.KeywordTokenizerFactory" />
161
+ </analyzer>
162
+ <analyzer type="query">
163
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
164
+ </analyzer>
165
+ </fieldType>
166
+
167
+ <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
168
+ <analyzer>
169
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
170
+ <filter class="solr.LowerCaseFilterFactory"/>
171
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
172
+ </analyzer>
173
+ </fieldType>
174
+ </types>
175
+
176
+ <fields>
177
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
178
+ or Solr won't start. _version_ and update log are required for SolrCloud
179
+ -->
180
+ <field name="_version_" type="long" indexed="true" stored="true"/>
181
+
182
+ <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
183
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
184
+
185
+ <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
186
+ <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
187
+
188
+ <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
189
+
190
+ <!-- text (_t...) -->
191
+ <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
192
+ <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
193
+ <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
194
+ <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
195
+ <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
196
+ <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
197
+ <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
198
+ <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
199
+ <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
200
+ <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
201
+
202
+ <!-- English text (_te...) -->
203
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
204
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
205
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
206
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
207
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
208
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
209
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
210
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
211
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
212
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
213
+
214
+ <!-- string (_s...) -->
215
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
216
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
217
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
218
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
219
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
220
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
221
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
222
+
223
+ <!-- integer (_i...) -->
224
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
225
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
226
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
227
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
228
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
229
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
230
+
231
+ <!-- trie integer (_it...) (for faster range queries) -->
232
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
233
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
234
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
235
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
236
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
237
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
238
+
239
+ <!-- date (_dt...) -->
240
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
241
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
242
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
243
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
244
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
245
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
246
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
247
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
248
+
249
+ <!-- trie date (_dtt...) (for faster range queries) -->
250
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
251
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
252
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
253
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
254
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
255
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
256
+
257
+ <!-- long (_l...) -->
258
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
259
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
260
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
261
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
262
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
263
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
264
+
265
+ <!-- trie long (_lt...) (for faster range queries) -->
266
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
267
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
268
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
269
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
270
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
271
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
272
+
273
+ <!-- double (_db...) -->
274
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
275
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
276
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
277
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
278
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
279
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
280
+
281
+ <!-- trie double (_dbt...) (for faster range queries) -->
282
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
283
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
284
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
285
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
286
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
287
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
288
+
289
+ <!-- float (_f...) -->
290
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
291
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
292
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
293
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
294
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
295
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
296
+
297
+ <!-- trie float (_ft...) (for faster range queries) -->
298
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
299
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
300
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
301
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
302
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
303
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
304
+
305
+ <!-- boolean (_b...) -->
306
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
307
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
308
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
309
+
310
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
311
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
312
+
313
+ <!-- location (_ll...) -->
314
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
315
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
316
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
317
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
318
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
319
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
320
+
321
+ <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
322
+
323
+ <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
324
+ <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
325
+
326
+ </fields>
327
+
328
+ <!-- Field to use to determine and enforce document uniqueness.
329
+ Unless this field is marked with required="false", it will be a required field
330
+ -->
331
+ <uniqueKey>id</uniqueKey>
332
+
333
+ <!-- copyField commands copy one field to another at the time a document
334
+ is added to the index. It's used either to index the same field differently,
335
+ or to add multiple fields to the same field for easier/faster searching. -->
336
+ <!-- Copy Fields -->
337
+
338
+ <!-- Above, multiple source fields are copied to the [text] field.
339
+ Another way to map multiple source fields to the same
340
+ destination field is to use the dynamic field syntax.
341
+ copyField also supports a maxChars to copy setting. -->
342
+
343
+ <!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> -->
344
+ <!-- for suggestions -->
345
+ <copyField source="*_tesim" dest="suggest"/>
346
+ <copyField source="*_ssim" dest="suggest"/>
347
+
348
+ <!-- Similarity is the scoring routine for each document vs. a query.
349
+ A custom similarity may be specified here, but the default is fine
350
+ for most applications. -->
351
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
352
+ <!-- ... OR ...
353
+ Specify a SimilarityFactory class name implementation
354
+ allowing parameters to be used.
355
+ -->
356
+ <!--
357
+ <similarity class="com.example.solr.CustomSimilarityFactory">
358
+ <str name="paramkey">param value</str>
359
+ </similarity>
360
+ -->
361
+
362
+ </schema>