iiif_print 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +102 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
  19. data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
  20. data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
  21. data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
  22. data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  23. data/app/listeners/iiif_print/listener.rb +31 -0
  24. data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
  25. data/app/models/concerns/iiif_print/solr/document.rb +19 -3
  26. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  27. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  28. data/app/models/iiif_print/pending_relationship.rb +3 -0
  29. data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
  30. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  31. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  32. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
  33. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  34. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  35. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  36. data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
  37. data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
  38. data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
  39. data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
  40. data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
  41. data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
  42. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  43. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  44. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  45. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  46. data/config/initializers/simple_schema_loader.rb +1 -0
  47. data/config/locales/iiif_print.en.yml +4 -0
  48. data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
  49. data/config/routes.rb +3 -0
  50. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
  51. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
  52. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
  53. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  54. data/docker-compose.yml +2 -2
  55. data/iiif_print.gemspec +11 -10
  56. data/lib/generators/iiif_print/install_generator.rb +21 -1
  57. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  58. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  59. data/lib/iiif_print/base_derivative_service.rb +14 -2
  60. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
  61. data/lib/iiif_print/catalog_search_builder.rb +7 -3
  62. data/lib/iiif_print/configuration.rb +205 -8
  63. data/lib/iiif_print/data/fileset_helper.rb +3 -3
  64. data/lib/iiif_print/data/work_derivatives.rb +4 -4
  65. data/lib/iiif_print/engine.rb +53 -15
  66. data/lib/iiif_print/errors.rb +18 -0
  67. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  68. data/lib/iiif_print/image_tool.rb +12 -8
  69. data/lib/iiif_print/jp2_derivative_service.rb +4 -1
  70. data/lib/iiif_print/lineage_service.rb +47 -13
  71. data/lib/iiif_print/metadata.rb +67 -48
  72. data/lib/iiif_print/pdf_derivative_service.rb +3 -1
  73. data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
  74. data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
  75. data/lib/iiif_print/persistence_layer.rb +118 -0
  76. data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
  77. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
  78. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  79. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
  80. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  81. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  82. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  83. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  84. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  85. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  86. data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
  87. data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
  88. data/lib/iiif_print/tiff_derivative_service.rb +3 -1
  89. data/lib/iiif_print/version.rb +1 -1
  90. data/lib/iiif_print.rb +210 -20
  91. data/lib/samvera/derivatives/configuration.rb +83 -0
  92. data/lib/samvera/derivatives/hyrax.rb +129 -0
  93. data/lib/samvera/derivatives.rb +238 -0
  94. data/tasks/copy_authorities_to_test_app.rake +11 -0
  95. data/tasks/iiif_print_dev.rake +4 -4
  96. metadata +111 -196
  97. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  98. data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
  99. data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
  100. data/bin/rails +0 -13
  101. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
  102. data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
  103. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  104. data/spec/.keep.txt +0 -1
  105. data/spec/factories/ability.rb +0 -6
  106. data/spec/factories/newspaper_issue.rb +0 -7
  107. data/spec/factories/newspaper_page.rb +0 -7
  108. data/spec/factories/newspaper_page_solr_document.rb +0 -12
  109. data/spec/factories/newspaper_title.rb +0 -8
  110. data/spec/factories/uploaded_pdf_file.rb +0 -9
  111. data/spec/factories/uploaded_txt_file.rb +0 -9
  112. data/spec/factories/user.rb +0 -13
  113. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  114. data/spec/fixtures/files/4.1.07.tiff +0 -0
  115. data/spec/fixtures/files/README.md +0 -7
  116. data/spec/fixtures/files/alto-2-0.xsd +0 -714
  117. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  118. data/spec/fixtures/files/credits.md +0 -16
  119. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  120. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  121. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  122. data/spec/fixtures/files/minimal-alto.xml +0 -31
  123. data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
  124. data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
  125. data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
  126. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  127. data/spec/fixtures/files/ocr_alto.xml +0 -202
  128. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
  129. data/spec/fixtures/files/ocr_color.tiff +0 -0
  130. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  131. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  132. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  133. data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
  134. data/spec/fixtures/files/page1.tiff +0 -0
  135. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  136. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  137. data/spec/fixtures/files/thumbnail.jpg +0 -0
  138. data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
  139. data/spec/helpers/iiif_print_helper_spec.rb +0 -43
  140. data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
  141. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
  142. data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
  143. data/spec/iiif_print/configuration_spec.rb +0 -67
  144. data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
  145. data/spec/iiif_print/data/work_file_spec.rb +0 -99
  146. data/spec/iiif_print/data/work_files_spec.rb +0 -237
  147. data/spec/iiif_print/image_tool_spec.rb +0 -109
  148. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
  149. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
  150. data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
  151. data/spec/iiif_print/lineage_service_spec.rb +0 -13
  152. data/spec/iiif_print/metadata_spec.rb +0 -115
  153. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
  154. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
  155. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
  156. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
  157. data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
  158. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
  159. data/spec/iiif_print_spec.rb +0 -51
  160. data/spec/misc_shared.rb +0 -111
  161. data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
  162. data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
  163. data/spec/models/solr_document_spec.rb +0 -14
  164. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
  165. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
  166. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
  167. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
  168. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
  169. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
  170. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
  171. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
  172. data/spec/spec_helper.rb +0 -181
  173. data/spec/support/controller_level_helpers.rb +0 -28
  174. data/spec/support/iiif_print_models.rb +0 -127
  175. data/spec/test_app_templates/blacklight.yml +0 -9
  176. data/spec/test_app_templates/fedora.yml +0 -15
  177. data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
  178. data/spec/test_app_templates/redis.yml +0 -9
  179. data/spec/test_app_templates/solr/conf/schema.xml +0 -362
  180. data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
  181. data/spec/test_app_templates/solr.yml +0 -7
@@ -1,127 +0,0 @@
1
- # frozen_string_literal: true
2
- ###################################################################################################
3
- #
4
- # The purpose of this file is to define the models we'll use in our spec application. Some of these
5
- # models are echoes of what downstream apps will define (e.g. FileSet). Other are for internal
6
- # modeling purposes only.
7
- #
8
- ####################################################################################################
9
- class FakeDerivativeService
10
- class_attribute :target_extension, default: 'txt'
11
- def initialize(target_extension: nil)
12
- self.target_extension = target_extension if target_extension
13
- @create_called = 0
14
- @cleanup_called = 0
15
- end
16
- attr_reader :create_called, :cleanup_called
17
-
18
- # Why the #new method?
19
- #
20
- # Because the plugin interface assumes we're passing a
21
- # plugin that responds to `new`. In prod code, that plugin is a class.
22
- # However, in test, to facilitate observing what methods are called we pass
23
- # the plugin as an instance of this class (e.g. `plugin =
24
- # FakeDerivativeService.new`). Later, in the process, the code calls
25
- # `plugin.new(file_set)`; it is then expected to return something that
26
- # responds to `create_derivatives` and `cleanup_derivatives`.
27
- #
28
- # @see IiifPrint::PluggableDerivativeService#initialize
29
- # @see IiifPrint::PluggableDerivativeService#services
30
- #
31
- # @note FakeDerivativeService.new returns an instance of
32
- # FakeDerivativeService. Likewise, FakeDerivativeService#new will now
33
- # return an instance of FakeDerivativeService
34
- def new(fileset)
35
- @fileset = fileset
36
- self
37
- end
38
-
39
- def valid?
40
- true
41
- end
42
-
43
- def create_derivatives(filename)
44
- @create_called += 1
45
- filename
46
- end
47
-
48
- def cleanup_derivatives
49
- @cleanup_called += 1
50
- end
51
- end
52
-
53
- ##
54
- # iiif_print requires a file set model that is compatible with Hyrax assumptions. We do not want to
55
- # add this to app/models because those are loaded in the downstream application; which can create
56
- class FileSet < ActiveFedora::Base
57
- include ::Hyrax::FileSetBehavior
58
- end
59
-
60
- class MyWork < ActiveFedora::Base
61
- include ::Hyrax::WorkBehavior
62
- end
63
-
64
- class MyWorkNeedsDerivative < ActiveFedora::Base
65
- attr_accessor :title
66
- def members
67
- []
68
- end
69
- end
70
-
71
- class MyWorkDoesNotNeedDerivative < ActiveFedora::Base
72
- attr_accessor :title
73
- def members
74
- []
75
- end
76
- end
77
-
78
- class MyIiifConfiguredWorkWithAllDerivativeServices < ActiveFedora::Base
79
- include IiifPrint.model_configuration
80
-
81
- attr_accessor :title
82
- def members
83
- []
84
- end
85
- end
86
-
87
- class MyIiifConfiguredWork < ActiveFedora::Base
88
- include IiifPrint.model_configuration(
89
- derivative_service_plugins: [FakeDerivativeService]
90
- )
91
- attr_accessor :title
92
- def members
93
- []
94
- end
95
- end
96
-
97
- # Newspaper Issue
98
- class NewspaperIssue < ActiveFedora::Base
99
- # WorkBehavior mixes in minimal ::Hyrax::CoreMetadata fields of
100
- # depositor, title, date_uploaded, and date_modified.
101
- # https://samvera.github.io/customize-metadata-model.html#core-metadata
102
- include ::Hyrax::WorkBehavior
103
- # BasicMetadata must be included last
104
- include ::Hyrax::BasicMetadata
105
- end
106
-
107
- # TODO: merge this in with whatever is needed from misc_shared.rb
108
- class WorkWithIiifPrintConfig < ActiveFedora::Base
109
- include ::Hyrax::WorkBehavior
110
- include IiifPrint::SetChildFlag
111
- include IiifPrint.model_configuration(pdf_split_child_model: WorkWithIiifPrintConfig)
112
- include ::Hyrax::BasicMetadata
113
-
114
- validates :title, presence: { message: 'Your work must have a title.' }
115
-
116
- # self.indexer = GenericWorkIndexer
117
- end
118
-
119
- class WorkWithOutConfig < ActiveFedora::Base
120
- include ::Hyrax::WorkBehavior
121
- include IiifPrint::SetChildFlag
122
- include ::Hyrax::BasicMetadata
123
-
124
- validates :title, presence: { message: 'Your work must have a title.' }
125
-
126
- # self.indexer = GenericWorkIndexer
127
- end
@@ -1,9 +0,0 @@
1
- development:
2
- adapter: solr
3
- url: <%= ENV['SOLR_URL'] %>/hyrax
4
- test: &test
5
- adapter: solr
6
- url: <%= ENV['SOLR_URL'] %>/hyrax_test
7
- production:
8
- adapter: solr
9
- url: <%= ENV['SOLR_URL'] || "http://127.0.0.1:8983/solr/blacklight-core" %>
@@ -1,15 +0,0 @@
1
- development:
2
- user: fedoraAdmin
3
- password: fedoraAdmin
4
- url: <%= ENV['FCREPO_URL'] %>/rest
5
- base_path: /dev
6
- test:
7
- user: fedoraAdmin
8
- password: fedoraAdmin
9
- url: <%= ENV['FCREPO_URL'] %>/rest
10
- base_path: /test
11
- production:
12
- user: fedoraAdmin
13
- password: fedoraAdmin
14
- url: http://127.0.0.1:8983/fedora/rest
15
- base_path: /prod
@@ -1,40 +0,0 @@
1
- # Test App Generator
2
- require 'rails/generators'
3
- require 'byebug'
4
- class TestAppGenerator < Rails::Generators::Base
5
- source_root File.expand_path('../../../spec/test_app_templates', __dir__)
6
-
7
- def install_redis
8
- gem 'redis', '4.8.0'
9
- Bundler.with_unbundled_env do
10
- run "bundle install"
11
- end
12
- end
13
-
14
- def install_hyrax
15
- generate 'hyrax:install', '-f'
16
- end
17
-
18
- # TODO not sure why this doesnt work
19
- # just copy them manually for the moment
20
- def install_config_files
21
- copy_file 'blacklight.yml', 'config/blacklight.yml'
22
- copy_file 'fedora.yml', 'config/fedora.yml'
23
- copy_file 'redis.yml', 'config/redis.yml'
24
- copy_file 'solr.yml', 'config/solr.yml'
25
- copy_file 'solr/conf/schema.xml', 'solr/conf/schema.xml'
26
- copy_file 'solr/conf/solrconfig.xml', 'solr/conf/solrconfig.xml'
27
- end
28
-
29
- def install_engine
30
- generate 'iiif_print:install'
31
- end
32
-
33
- def db_migrations
34
- rake 'db:migrate'
35
- end
36
-
37
- def configure_browse_everything
38
- generate 'browse_everything:config'
39
- end
40
- end
@@ -1,9 +0,0 @@
1
- development:
2
- host: <%= ENV['REDIS_HOST'] || 'localhost' %>
3
- port: 6379
4
- test:
5
- host: <%= ENV['REDIS_HOST'] || 'localhost' %>
6
- port: 6379
7
- production:
8
- host: <%= ENV['REDIS_HOST'] || 'localhost' %>
9
- port: 6379
@@ -1,362 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8" ?>
2
- <!--
3
- Licensed to the Apache Software Foundation (ASF) under one or more
4
- contributor license agreements. See the NOTICE file distributed with
5
- this work for additional information regarding copyright ownership.
6
- The ASF licenses this file to You under the Apache License, Version 2.0
7
- (the "License"); you may not use this file except in compliance with
8
- the License. You may obtain a copy of the License at
9
-
10
- http://www.apache.org/licenses/LICENSE-2.0
11
-
12
- Unless required by applicable law or agreed to in writing, software
13
- distributed under the License is distributed on an "AS IS" BASIS,
14
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- See the License for the specific language governing permissions and
16
- limitations under the License.
17
- -->
18
-
19
- <!--
20
- This is the Solr schema file. This file should be named "schema.xml" and
21
- should be in the conf directory under the solr home
22
- (i.e. ./solr/conf/schema.xml by default)
23
- or located where the classloader for the Solr webapp can find it.
24
-
25
- This example schema is the recommended starting point for users.
26
- It should be kept correct and concise, usable out-of-the-box.
27
-
28
- For more information, on how to customize this file, please see
29
- http://wiki.apache.org/solr/SchemaXml
30
-
31
- PERFORMANCE NOTE: this schema includes many optional features and should not
32
- be used for benchmarking. To improve performance one could
33
- - set stored="false" for all fields possible (esp large fields) when you
34
- only need to search on the field but don't need to return the original
35
- value.
36
- - set indexed="false" if you don't need to search on the field, but only
37
- return the field as a result of searching on other indexed fields.
38
- - remove all unneeded copyField statements
39
- - for best index size and searching performance, set "index" to false
40
- for all general text fields, use copyField to copy them to the
41
- catchall "text" field, and use that for searching.
42
- - For maximum indexing performance, use the StreamingUpdateSolrServer
43
- java client.
44
- - Remember to run the JVM in server mode, and use a higher logging level
45
- that avoids logging every request
46
- -->
47
-
48
- <schema name="Hydra Demo Index" version="1.5">
49
- <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
- Applications should change this to reflect the nature of the search collection.
51
- version="1.4" is Solr's version number for the schema syntax and semantics. It should
52
- not normally be changed by applications.
53
- 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
- 1.1: multiValued attribute introduced, false by default
55
- 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
- 1.3: removed optional field compress feature
57
- 1.4: default auto-phrase (QueryParser feature) to off
58
- -->
59
-
60
- <types>
61
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
62
- <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
63
- <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
64
-
65
- <!-- Default numeric field types. -->
66
- <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
67
- <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
68
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
69
- <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
70
-
71
- <!-- trie numeric field types for faster range queries -->
72
- <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
73
- <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
74
- <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
75
- <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
76
-
77
- <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
78
- Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
79
- -->
80
- <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
81
- <!-- A Trie based date field for faster date range queries and date faceting. -->
82
- <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
83
-
84
- <!-- This point type indexes the coordinates as separate fields (subFields)
85
- If subFieldType is defined, it references a type, and a dynamic field
86
- definition is created matching *___<typename>. Alternately, if
87
- subFieldSuffix is defined, that is used to create the subFields.
88
- Example: if subFieldType="double", then the coordinates would be
89
- indexed in fields myloc_0___double,myloc_1___double.
90
- Example: if subFieldSuffix="_d" then the coordinates would be indexed
91
- in fields myloc_0_d,myloc_1_d
92
- The subFields are an implementation detail of the fieldType, and end
93
- users normally should not need to know about them.
94
- -->
95
- <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
96
-
97
- <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
98
- <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
99
-
100
- <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
101
- For more information about this and other Spatial fields new to Solr 4, see:
102
- http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
103
- -->
104
- <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
105
- geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" />
106
-
107
- <fieldType name="text" class="solr.TextField" omitNorms="false">
108
- <analyzer>
109
- <tokenizer class="solr.ICUTokenizerFactory"/>
110
- <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
111
- <filter class="solr.TrimFilterFactory"/>
112
- </analyzer>
113
- </fieldType>
114
-
115
- <!-- A text field that only splits on whitespace for exact matching of words -->
116
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
117
- <analyzer>
118
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
119
- <filter class="solr.TrimFilterFactory"/>
120
- </analyzer>
121
- </fieldType>
122
-
123
- <!-- single token analyzed text, for sorting. Punctuation is significant. -->
124
- <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
125
- <analyzer>
126
- <tokenizer class="solr.KeywordTokenizerFactory" />
127
- <filter class="solr.ICUFoldingFilterFactory"/>
128
- <filter class="solr.TrimFilterFactory" />
129
- </analyzer>
130
- </fieldtype>
131
-
132
- <!-- A text field with defaults appropriate for English -->
133
- <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
134
- <analyzer>
135
- <tokenizer class="solr.ICUTokenizerFactory"/>
136
- <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
137
- <filter class="solr.EnglishPossessiveFilterFactory"/>
138
- <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
139
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
140
- <!--
141
- <filter class="solr.PorterStemFilterFactory"/>
142
- -->
143
- <filter class="solr.TrimFilterFactory"/>
144
- </analyzer>
145
- </fieldType>
146
-
147
- <!-- queries for paths match documents at that path, or in descendent paths -->
148
- <fieldType name="descendent_path" class="solr.TextField">
149
- <analyzer type="index">
150
- <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
151
- </analyzer>
152
- <analyzer type="query">
153
- <tokenizer class="solr.KeywordTokenizerFactory" />
154
- </analyzer>
155
- </fieldType>
156
-
157
- <!-- queries for paths match documents at that path, or in ancestor paths -->
158
- <fieldType name="ancestor_path" class="solr.TextField">
159
- <analyzer type="index">
160
- <tokenizer class="solr.KeywordTokenizerFactory" />
161
- </analyzer>
162
- <analyzer type="query">
163
- <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
164
- </analyzer>
165
- </fieldType>
166
-
167
- <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
168
- <analyzer>
169
- <tokenizer class="solr.KeywordTokenizerFactory"/>
170
- <filter class="solr.LowerCaseFilterFactory"/>
171
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
172
- </analyzer>
173
- </fieldType>
174
- </types>
175
-
176
- <fields>
177
- <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
178
- or Solr won't start. _version_ and update log are required for SolrCloud
179
- -->
180
- <field name="_version_" type="long" indexed="true" stored="true"/>
181
-
182
- <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
183
- <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
184
-
185
- <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
186
- <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
187
-
188
- <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
189
-
190
- <!-- text (_t...) -->
191
- <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
192
- <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
193
- <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
194
- <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
195
- <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
196
- <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
197
- <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
198
- <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
199
- <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
200
- <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
201
-
202
- <!-- English text (_te...) -->
203
- <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
204
- <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
205
- <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
206
- <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
207
- <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
208
- <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
209
- <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
210
- <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
211
- <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
212
- <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
213
-
214
- <!-- string (_s...) -->
215
- <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
216
- <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
217
- <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
218
- <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
219
- <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
220
- <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
221
- <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
222
-
223
- <!-- integer (_i...) -->
224
- <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
225
- <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
226
- <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
227
- <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
228
- <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
229
- <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
230
-
231
- <!-- trie integer (_it...) (for faster range queries) -->
232
- <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
233
- <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
234
- <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
235
- <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
236
- <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
237
- <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
238
-
239
- <!-- date (_dt...) -->
240
- <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
241
- Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
242
- <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
243
- <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
244
- <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
245
- <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
246
- <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
247
- <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
248
-
249
- <!-- trie date (_dtt...) (for faster range queries) -->
250
- <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
251
- <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
252
- <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
253
- <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
254
- <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
255
- <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
256
-
257
- <!-- long (_l...) -->
258
- <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
259
- <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
260
- <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
261
- <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
262
- <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
263
- <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
264
-
265
- <!-- trie long (_lt...) (for faster range queries) -->
266
- <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
267
- <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
268
- <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
269
- <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
270
- <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
271
- <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
272
-
273
- <!-- double (_db...) -->
274
- <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
275
- <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
276
- <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
277
- <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
278
- <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
279
- <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
280
-
281
- <!-- trie double (_dbt...) (for faster range queries) -->
282
- <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
283
- <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
284
- <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
285
- <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
286
- <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
287
- <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
288
-
289
- <!-- float (_f...) -->
290
- <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
291
- <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
292
- <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
293
- <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
294
- <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
295
- <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
296
-
297
- <!-- trie float (_ft...) (for faster range queries) -->
298
- <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
299
- <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
300
- <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
301
- <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
302
- <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
303
- <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
304
-
305
- <!-- boolean (_b...) -->
306
- <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
307
- <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
308
- <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
309
-
310
- <!-- Type used to index the lat and lon components for the "location" FieldType -->
311
- <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
312
-
313
- <!-- location (_ll...) -->
314
- <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
315
- <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
316
- <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
317
- <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
318
- <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
319
- <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
320
-
321
- <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
322
-
323
- <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
324
- <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
325
-
326
- </fields>
327
-
328
- <!-- Field to use to determine and enforce document uniqueness.
329
- Unless this field is marked with required="false", it will be a required field
330
- -->
331
- <uniqueKey>id</uniqueKey>
332
-
333
- <!-- copyField commands copy one field to another at the time a document
334
- is added to the index. It's used either to index the same field differently,
335
- or to add multiple fields to the same field for easier/faster searching. -->
336
- <!-- Copy Fields -->
337
-
338
- <!-- Above, multiple source fields are copied to the [text] field.
339
- Another way to map multiple source fields to the same
340
- destination field is to use the dynamic field syntax.
341
- copyField also supports a maxChars to copy setting. -->
342
-
343
- <!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> -->
344
- <!-- for suggestions -->
345
- <copyField source="*_tesim" dest="suggest"/>
346
- <copyField source="*_ssim" dest="suggest"/>
347
-
348
- <!-- Similarity is the scoring routine for each document vs. a query.
349
- A custom similarity may be specified here, but the default is fine
350
- for most applications. -->
351
- <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
352
- <!-- ... OR ...
353
- Specify a SimilarityFactory class name implementation
354
- allowing parameters to be used.
355
- -->
356
- <!--
357
- <similarity class="com.example.solr.CustomSimilarityFactory">
358
- <str name="paramkey">param value</str>
359
- </similarity>
360
- -->
361
-
362
- </schema>