iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+ ###################################################################################################
3
+ #
4
+ # The purpose of this file is to define the models we'll use in our spec application. Some of these
5
+ # models are echoes of what downstream apps will define (e.g. FileSet). Other are for internal
6
+ # modeling purposes only.
7
+ #
8
+ ####################################################################################################
9
+ class FakeDerivativeService
10
+ class_attribute :target_extension, default: 'txt'
11
+ def initialize(target_extension: nil)
12
+ self.target_extension = target_extension if target_extension
13
+ @create_called = 0
14
+ @cleanup_called = 0
15
+ end
16
+ attr_reader :create_called, :cleanup_called
17
+
18
+ # Why the #new method?
19
+ #
20
+ # Because the plugin interface assumes we're passing a
21
+ # plugin that responds to `new`. In prod code, that plugin is a class.
22
+ # However, in test, to facilitate observing what methods are called we pass
23
+ # the plugin as an instance of this class (e.g. `plugin =
24
+ # FakeDerivativeService.new`). Later, in the process, the code calls
25
+ # `plugin.new(file_set)`; it is then expected to return something that
26
+ # responds to `create_derivatives` and `cleanup_derivatives`.
27
+ #
28
+ # @see IiifPrint::PluggableDerivativeService#initialize
29
+ # @see IiifPrint::PluggableDerivativeService#services
30
+ #
31
+ # @note FakeDerivativeService.new returns an instance of
32
+ # FakeDerivativeService. Likewise, FakeDerivativeService#new will now
33
+ # return an instance of FakeDerivativeService
34
+ def new(fileset)
35
+ @fileset = fileset
36
+ self
37
+ end
38
+
39
+ def valid?
40
+ true
41
+ end
42
+
43
+ def create_derivatives(filename)
44
+ @create_called += 1
45
+ filename
46
+ end
47
+
48
+ def cleanup_derivatives
49
+ @cleanup_called += 1
50
+ end
51
+ end
52
+
53
+ ##
54
+ # iiif_print requires a file set model that is compatible with Hyrax assumptions. We do not want to
55
+ # add this to app/models because those are loaded in the downstream application; which can create
56
+ class FileSet < ActiveFedora::Base
57
+ include ::Hyrax::FileSetBehavior
58
+ end
59
+
60
+ class MyWork < ActiveFedora::Base
61
+ include ::Hyrax::WorkBehavior
62
+ end
63
+
64
+ class MyWorkNeedsDerivative < ActiveFedora::Base
65
+ attr_accessor :title
66
+ def members
67
+ []
68
+ end
69
+ end
70
+
71
+ class MyWorkDoesNotNeedDerivative < ActiveFedora::Base
72
+ attr_accessor :title
73
+ def members
74
+ []
75
+ end
76
+ end
77
+
78
+ class MyIiifConfiguredWorkWithAllDerivativeServices < ActiveFedora::Base
79
+ include IiifPrint.model_configuration
80
+
81
+ attr_accessor :title
82
+ def members
83
+ []
84
+ end
85
+ end
86
+
87
+ class MyIiifConfiguredWork < ActiveFedora::Base
88
+ include IiifPrint.model_configuration(
89
+ derivative_service_plugins: [FakeDerivativeService]
90
+ )
91
+ attr_accessor :title
92
+ def members
93
+ []
94
+ end
95
+ end
96
+
97
+ # Newspaper Issue
98
+ class NewspaperIssue < ActiveFedora::Base
99
+ # WorkBehavior mixes in minimal ::Hyrax::CoreMetadata fields of
100
+ # depositor, title, date_uploaded, and date_modified.
101
+ # https://samvera.github.io/customize-metadata-model.html#core-metadata
102
+ include ::Hyrax::WorkBehavior
103
+ # BasicMetadata must be included last
104
+ include ::Hyrax::BasicMetadata
105
+ end
106
+
107
+ # TODO: merge this in with whatever is needed from misc_shared.rb
108
+ class WorkWithIiifPrintConfig < ActiveFedora::Base
109
+ include ::Hyrax::WorkBehavior
110
+ include IiifPrint::SetChildFlag
111
+ include IiifPrint.model_configuration(pdf_split_child_model: WorkWithIiifPrintConfig)
112
+ include ::Hyrax::BasicMetadata
113
+
114
+ validates :title, presence: { message: 'Your work must have a title.' }
115
+
116
+ # self.indexer = GenericWorkIndexer
117
+ end
118
+
119
+ class WorkWithOutConfig < ActiveFedora::Base
120
+ include ::Hyrax::WorkBehavior
121
+ include IiifPrint::SetChildFlag
122
+ include ::Hyrax::BasicMetadata
123
+
124
+ validates :title, presence: { message: 'Your work must have a title.' }
125
+
126
+ # self.indexer = GenericWorkIndexer
127
+ end
@@ -0,0 +1,9 @@
1
+ development:
2
+ adapter: solr
3
+ url: <%= ENV['SOLR_URL'] %>/hyrax
4
+ test: &test
5
+ adapter: solr
6
+ url: <%= ENV['SOLR_URL'] %>/hyrax_test
7
+ production:
8
+ adapter: solr
9
+ url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %>
@@ -0,0 +1,15 @@
1
+ development:
2
+ user: fedoraAdmin
3
+ password: fedoraAdmin
4
+ url: <%= ENV['FCREPO_URL'] %>/rest
5
+ base_path: /dev
6
+ test:
7
+ user: fedoraAdmin
8
+ password: fedoraAdmin
9
+ url: <%= ENV['FCREPO_URL'] %>/rest
10
+ base_path: /test
11
+ production:
12
+ user: fedoraAdmin
13
+ password: fedoraAdmin
14
+ url: http://127.0.0.1:8983/fedora/rest
15
+ base_path: /prod
@@ -0,0 +1,40 @@
1
+ # Test App Generator
2
+ require 'rails/generators'
3
+ require 'byebug'
4
+ class TestAppGenerator < Rails::Generators::Base
5
+ source_root File.expand_path('../../../spec/test_app_templates', __dir__)
6
+
7
+ def install_redis
8
+ gem 'redis', '4.8.0'
9
+ Bundler.with_unbundled_env do
10
+ run "bundle install"
11
+ end
12
+ end
13
+
14
+ def install_hyrax
15
+ generate 'hyrax:install', '-f'
16
+ end
17
+
18
+ # TODO not sure why this doesnt work
19
+ # just copy them manually for the moment
20
+ def install_config_files
21
+ copy_file 'blacklight.yml', 'config/blacklight.yml'
22
+ copy_file 'fedora.yml', 'config/fedora.yml'
23
+ copy_file 'redis.yml', 'config/redis.yml'
24
+ copy_file 'solr.yml', 'config/solr.yml'
25
+ copy_file 'solr/conf/schema.xml', 'solr/conf/schema.xml'
26
+ copy_file 'solr/conf/solrconfig.xml', 'solr/conf/solrconfig.xml'
27
+ end
28
+
29
+ def install_engine
30
+ generate 'iiif_print:install'
31
+ end
32
+
33
+ def db_migrations
34
+ rake 'db:migrate'
35
+ end
36
+
37
+ def configure_browse_everything
38
+ generate 'browse_everything:config'
39
+ end
40
+ end
@@ -0,0 +1,9 @@
1
+ development:
2
+ host: <%= ENV['REDIS_HOST'] || 'localhost' %>
3
+ port: 6379
4
+ test:
5
+ host: <%= ENV['REDIS_HOST'] || 'localhost' %>
6
+ port: 6379
7
+ production:
8
+ host: <%= ENV['REDIS_HOST'] || 'localhost' %>
9
+ port: 6379
@@ -0,0 +1,362 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="Hydra Demo Index" version="1.5">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ Applications should change this to reflect the nature of the search collection.
51
+ version="1.4" is Solr's version number for the schema syntax and semantics. It should
52
+ not normally be changed by applications.
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
+ 1.1: multiValued attribute introduced, false by default
55
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
+ 1.3: removed optional field compress feature
57
+ 1.4: default auto-phrase (QueryParser feature) to off
58
+ -->
59
+
60
+ <types>
61
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
62
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
63
+ <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
64
+
65
+ <!-- Default numeric field types. -->
66
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
67
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
68
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
69
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
70
+
71
+ <!-- trie numeric field types for faster range queries -->
72
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
73
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
74
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
75
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
76
+
77
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
78
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
79
+ -->
80
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
81
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
82
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
83
+
84
+ <!-- This point type indexes the coordinates as separate fields (subFields)
85
+ If subFieldType is defined, it references a type, and a dynamic field
86
+ definition is created matching *___<typename>. Alternately, if
87
+ subFieldSuffix is defined, that is used to create the subFields.
88
+ Example: if subFieldType="double", then the coordinates would be
89
+ indexed in fields myloc_0___double,myloc_1___double.
90
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
91
+ in fields myloc_0_d,myloc_1_d
92
+ The subFields are an implementation detail of the fieldType, and end
93
+ users normally should not need to know about them.
94
+ -->
95
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
96
+
97
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
98
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
99
+
100
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
101
+ For more information about this and other Spatial fields new to Solr 4, see:
102
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
103
+ -->
104
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
105
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" />
106
+
107
+ <fieldType name="text" class="solr.TextField" omitNorms="false">
108
+ <analyzer>
109
+ <tokenizer class="solr.ICUTokenizerFactory"/>
110
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
111
+ <filter class="solr.TrimFilterFactory"/>
112
+ </analyzer>
113
+ </fieldType>
114
+
115
+ <!-- A text field that only splits on whitespace for exact matching of words -->
116
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
117
+ <analyzer>
118
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
119
+ <filter class="solr.TrimFilterFactory"/>
120
+ </analyzer>
121
+ </fieldType>
122
+
123
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
124
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
125
+ <analyzer>
126
+ <tokenizer class="solr.KeywordTokenizerFactory" />
127
+ <filter class="solr.ICUFoldingFilterFactory"/>
128
+ <filter class="solr.TrimFilterFactory" />
129
+ </analyzer>
130
+ </fieldtype>
131
+
132
+ <!-- A text field with defaults appropriate for English -->
133
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
134
+ <analyzer>
135
+ <tokenizer class="solr.ICUTokenizerFactory"/>
136
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
137
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
138
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
139
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
140
+ <!--
141
+ <filter class="solr.PorterStemFilterFactory"/>
142
+ -->
143
+ <filter class="solr.TrimFilterFactory"/>
144
+ </analyzer>
145
+ </fieldType>
146
+
147
+ <!-- queries for paths match documents at that path, or in descendent paths -->
148
+ <fieldType name="descendent_path" class="solr.TextField">
149
+ <analyzer type="index">
150
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
151
+ </analyzer>
152
+ <analyzer type="query">
153
+ <tokenizer class="solr.KeywordTokenizerFactory" />
154
+ </analyzer>
155
+ </fieldType>
156
+
157
+ <!-- queries for paths match documents at that path, or in ancestor paths -->
158
+ <fieldType name="ancestor_path" class="solr.TextField">
159
+ <analyzer type="index">
160
+ <tokenizer class="solr.KeywordTokenizerFactory" />
161
+ </analyzer>
162
+ <analyzer type="query">
163
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
164
+ </analyzer>
165
+ </fieldType>
166
+
167
+ <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
168
+ <analyzer>
169
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
170
+ <filter class="solr.LowerCaseFilterFactory"/>
171
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
172
+ </analyzer>
173
+ </fieldType>
174
+ </types>
175
+
176
+ <fields>
177
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
178
+ or Solr won't start. _version_ and update log are required for SolrCloud
179
+ -->
180
+ <field name="_version_" type="long" indexed="true" stored="true"/>
181
+
182
+ <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
183
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
184
+
185
+ <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
186
+ <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
187
+
188
+ <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
189
+
190
+ <!-- text (_t...) -->
191
+ <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
192
+ <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
193
+ <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
194
+ <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
195
+ <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
196
+ <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
197
+ <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
198
+ <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
199
+ <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
200
+ <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
201
+
202
+ <!-- English text (_te...) -->
203
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
204
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
205
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
206
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
207
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
208
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
209
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
210
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
211
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
212
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
213
+
214
+ <!-- string (_s...) -->
215
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
216
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
217
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
218
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
219
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
220
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
221
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
222
+
223
+ <!-- integer (_i...) -->
224
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
225
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
226
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
227
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
228
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
229
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
230
+
231
+ <!-- trie integer (_it...) (for faster range queries) -->
232
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
233
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
234
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
235
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
236
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
237
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
238
+
239
+ <!-- date (_dt...) -->
240
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
241
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
242
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
243
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
244
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
245
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
246
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
247
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
248
+
249
+ <!-- trie date (_dtt...) (for faster range queries) -->
250
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
251
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
252
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
253
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
254
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
255
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
256
+
257
+ <!-- long (_l...) -->
258
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
259
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
260
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
261
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
262
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
263
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
264
+
265
+ <!-- trie long (_lt...) (for faster range queries) -->
266
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
267
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
268
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
269
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
270
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
271
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
272
+
273
+ <!-- double (_db...) -->
274
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
275
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
276
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
277
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
278
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
279
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
280
+
281
+ <!-- trie double (_dbt...) (for faster range queries) -->
282
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
283
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
284
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
285
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
286
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
287
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
288
+
289
+ <!-- float (_f...) -->
290
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
291
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
292
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
293
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
294
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
295
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
296
+
297
+ <!-- trie float (_ft...) (for faster range queries) -->
298
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
299
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
300
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
301
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
302
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
303
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
304
+
305
+ <!-- boolean (_b...) -->
306
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
307
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
308
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
309
+
310
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
311
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
312
+
313
+ <!-- location (_ll...) -->
314
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
315
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
316
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
317
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
318
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
319
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
320
+
321
+ <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
322
+
323
+ <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
324
+ <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
325
+
326
+ </fields>
327
+
328
+ <!-- Field to use to determine and enforce document uniqueness.
329
+ Unless this field is marked with required="false", it will be a required field
330
+ -->
331
+ <uniqueKey>id</uniqueKey>
332
+
333
+ <!-- copyField commands copy one field to another at the time a document
334
+ is added to the index. It's used either to index the same field differently,
335
+ or to add multiple fields to the same field for easier/faster searching. -->
336
+ <!-- Copy Fields -->
337
+
338
+ <!-- Above, multiple source fields are copied to the [text] field.
339
+ Another way to map multiple source fields to the same
340
+ destination field is to use the dynamic field syntax.
341
+ copyField also supports a maxChars to copy setting. -->
342
+
343
+ <!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> -->
344
+ <!-- for suggestions -->
345
+ <copyField source="*_tesim" dest="suggest"/>
346
+ <copyField source="*_ssim" dest="suggest"/>
347
+
348
+ <!-- Similarity is the scoring routine for each document vs. a query.
349
+ A custom similarity may be specified here, but the default is fine
350
+ for most applications. -->
351
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
352
+ <!-- ... OR ...
353
+ Specify a SimilarityFactory class name implementation
354
+ allowing parameters to be used.
355
+ -->
356
+ <!--
357
+ <similarity class="com.example.solr.CustomSimilarityFactory">
358
+ <str name="paramkey">param value</str>
359
+ </similarity>
360
+ -->
361
+
362
+ </schema>