iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,246 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # Syntax:
14
+ # "source" => "target"
15
+ # "source".length() > 0 (source cannot be empty.)
16
+ # "target".length() >= 0 (target can be empty.)
17
+
18
+ # example:
19
+ # "??" => "A"
20
+ # "\u00C0" => "A"
21
+ # "\u00C0" => "\u0041"
22
+ # "??" => "ss"
23
+ # "\t" => " "
24
+ # "\n" => ""
25
+
26
+ # ?? => A
27
+ "\u00C0" => "A"
28
+
29
+ # ?? => A
30
+ "\u00C1" => "A"
31
+
32
+ # ?? => A
33
+ "\u00C2" => "A"
34
+
35
+ # ?? => A
36
+ "\u00C3" => "A"
37
+
38
+ # ?? => A
39
+ "\u00C4" => "A"
40
+
41
+ # ?? => A
42
+ "\u00C5" => "A"
43
+
44
+ # ?? => AE
45
+ "\u00C6" => "AE"
46
+
47
+ # ?? => C
48
+ "\u00C7" => "C"
49
+
50
+ # ?? => E
51
+ "\u00C8" => "E"
52
+
53
+ # ?? => E
54
+ "\u00C9" => "E"
55
+
56
+ # ?? => E
57
+ "\u00CA" => "E"
58
+
59
+ # ?? => E
60
+ "\u00CB" => "E"
61
+
62
+ # ?? => I
63
+ "\u00CC" => "I"
64
+
65
+ # ?? => I
66
+ "\u00CD" => "I"
67
+
68
+ # ?? => I
69
+ "\u00CE" => "I"
70
+
71
+ # ?? => I
72
+ "\u00CF" => "I"
73
+
74
+ # ?? => IJ
75
+ "\u0132" => "IJ"
76
+
77
+ # ?? => D
78
+ "\u00D0" => "D"
79
+
80
+ # ?? => N
81
+ "\u00D1" => "N"
82
+
83
+ # ?? => O
84
+ "\u00D2" => "O"
85
+
86
+ # ?? => O
87
+ "\u00D3" => "O"
88
+
89
+ # ?? => O
90
+ "\u00D4" => "O"
91
+
92
+ # ?? => O
93
+ "\u00D5" => "O"
94
+
95
+ # ?? => O
96
+ "\u00D6" => "O"
97
+
98
+ # ?? => O
99
+ "\u00D8" => "O"
100
+
101
+ # ?? => OE
102
+ "\u0152" => "OE"
103
+
104
+ # ??
105
+ "\u00DE" => "TH"
106
+
107
+ # ?? => U
108
+ "\u00D9" => "U"
109
+
110
+ # ?? => U
111
+ "\u00DA" => "U"
112
+
113
+ # ?? => U
114
+ "\u00DB" => "U"
115
+
116
+ # ?? => U
117
+ "\u00DC" => "U"
118
+
119
+ # ?? => Y
120
+ "\u00DD" => "Y"
121
+
122
+ # ?? => Y
123
+ "\u0178" => "Y"
124
+
125
+ # ?? => a
126
+ "\u00E0" => "a"
127
+
128
+ # ?? => a
129
+ "\u00E1" => "a"
130
+
131
+ # ?? => a
132
+ "\u00E2" => "a"
133
+
134
+ # ?? => a
135
+ "\u00E3" => "a"
136
+
137
+ # ?? => a
138
+ "\u00E4" => "a"
139
+
140
+ # ?? => a
141
+ "\u00E5" => "a"
142
+
143
+ # ?? => ae
144
+ "\u00E6" => "ae"
145
+
146
+ # ?? => c
147
+ "\u00E7" => "c"
148
+
149
+ # ?? => e
150
+ "\u00E8" => "e"
151
+
152
+ # ?? => e
153
+ "\u00E9" => "e"
154
+
155
+ # ?? => e
156
+ "\u00EA" => "e"
157
+
158
+ # ?? => e
159
+ "\u00EB" => "e"
160
+
161
+ # ?? => i
162
+ "\u00EC" => "i"
163
+
164
+ # ?? => i
165
+ "\u00ED" => "i"
166
+
167
+ # ?? => i
168
+ "\u00EE" => "i"
169
+
170
+ # ?? => i
171
+ "\u00EF" => "i"
172
+
173
+ # ?? => ij
174
+ "\u0133" => "ij"
175
+
176
+ # ?? => d
177
+ "\u00F0" => "d"
178
+
179
+ # ?? => n
180
+ "\u00F1" => "n"
181
+
182
+ # ?? => o
183
+ "\u00F2" => "o"
184
+
185
+ # ?? => o
186
+ "\u00F3" => "o"
187
+
188
+ # ?? => o
189
+ "\u00F4" => "o"
190
+
191
+ # ?? => o
192
+ "\u00F5" => "o"
193
+
194
+ # ?? => o
195
+ "\u00F6" => "o"
196
+
197
+ # ?? => o
198
+ "\u00F8" => "o"
199
+
200
+ # ?? => oe
201
+ "\u0153" => "oe"
202
+
203
+ # ?? => ss
204
+ "\u00DF" => "ss"
205
+
206
+ # ?? => th
207
+ "\u00FE" => "th"
208
+
209
+ # ?? => u
210
+ "\u00F9" => "u"
211
+
212
+ # ?? => u
213
+ "\u00FA" => "u"
214
+
215
+ # ?? => u
216
+ "\u00FB" => "u"
217
+
218
+ # ?? => u
219
+ "\u00FC" => "u"
220
+
221
+ # ?? => y
222
+ "\u00FD" => "y"
223
+
224
+ # ?? => y
225
+ "\u00FF" => "y"
226
+
227
+ # ??? => ff
228
+ "\uFB00" => "ff"
229
+
230
+ # ??? => fi
231
+ "\uFB01" => "fi"
232
+
233
+ # ??? => fl
234
+ "\uFB02" => "fl"
235
+
236
+ # ??? => ffi
237
+ "\uFB03" => "ffi"
238
+
239
+ # ??? => ffl
240
+ "\uFB04" => "ffl"
241
+
242
+ # ??? => ft
243
+ "\uFB05" => "ft"
244
+
245
+ # ??? => st
246
+ "\uFB06" => "st"
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,366 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="Hydra Demo Index" version="1.5">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ Applications should change this to reflect the nature of the search collection.
51
+ version="1.4" is Solr's version number for the schema syntax and semantics. It should
52
+ not normally be changed by applications.
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
+ 1.1: multiValued attribute introduced, false by default
55
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
+ 1.3: removed optional field compress feature
57
+ 1.4: default auto-phrase (QueryParser feature) to off
58
+ -->
59
+
60
+ <types>
61
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
62
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
63
+ <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
64
+
65
+ <!-- Default numeric field types. -->
66
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
67
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
68
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
69
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
70
+
71
+ <!-- trie numeric field types for faster range queries -->
72
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
73
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
74
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
75
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
76
+
77
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
78
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
79
+ -->
80
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
81
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
82
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
83
+
84
+
85
+ <!-- This point type indexes the coordinates as separate fields (subFields)
86
+ If subFieldType is defined, it references a type, and a dynamic field
87
+ definition is created matching *___<typename>. Alternately, if
88
+ subFieldSuffix is defined, that is used to create the subFields.
89
+ Example: if subFieldType="double", then the coordinates would be
90
+ indexed in fields myloc_0___double,myloc_1___double.
91
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
92
+ in fields myloc_0_d,myloc_1_d
93
+ The subFields are an implementation detail of the fieldType, and end
94
+ users normally should not need to know about them.
95
+ -->
96
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
97
+
98
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
99
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
100
+
101
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
102
+ For more information about this and other Spatial fields new to Solr 4, see:
103
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
104
+ -->
105
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
106
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" />
107
+
108
+ <fieldType name="text" class="solr.TextField" omitNorms="false">
109
+ <analyzer>
110
+ <tokenizer class="solr.ICUTokenizerFactory"/>
111
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
112
+ <filter class="solr.TrimFilterFactory"/>
113
+ </analyzer>
114
+ </fieldType>
115
+
116
+ <!-- A text field that only splits on whitespace for exact matching of words -->
117
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
118
+ <analyzer>
119
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
120
+ <filter class="solr.TrimFilterFactory"/>
121
+ </analyzer>
122
+ </fieldType>
123
+
124
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
125
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
126
+ <analyzer>
127
+ <tokenizer class="solr.KeywordTokenizerFactory" />
128
+ <filter class="solr.ICUFoldingFilterFactory"/>
129
+ <filter class="solr.TrimFilterFactory" />
130
+ </analyzer>
131
+ </fieldtype>
132
+
133
+ <!-- A text field with defaults appropriate for English -->
134
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
135
+ <analyzer>
136
+ <tokenizer class="solr.ICUTokenizerFactory"/>
137
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
138
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
139
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
140
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
141
+ <!--
142
+ <filter class="solr.PorterStemFilterFactory"/>
143
+ -->
144
+ <filter class="solr.TrimFilterFactory"/>
145
+ </analyzer>
146
+ </fieldType>
147
+
148
+ <!-- queries for paths match documents at that path, or in descendent paths -->
149
+ <fieldType name="descendent_path" class="solr.TextField">
150
+ <analyzer type="index">
151
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
152
+ </analyzer>
153
+ <analyzer type="query">
154
+ <tokenizer class="solr.KeywordTokenizerFactory" />
155
+ </analyzer>
156
+ </fieldType>
157
+
158
+ <!-- queries for paths match documents at that path, or in ancestor paths -->
159
+ <fieldType name="ancestor_path" class="solr.TextField">
160
+ <analyzer type="index">
161
+ <tokenizer class="solr.KeywordTokenizerFactory" />
162
+ </analyzer>
163
+ <analyzer type="query">
164
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
165
+ </analyzer>
166
+ </fieldType>
167
+
168
+ <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
169
+ <analyzer>
170
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
171
+ <filter class="solr.StandardFilterFactory"/>
172
+ <filter class="solr.LowerCaseFilterFactory"/>
173
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
174
+ </analyzer>
175
+ </fieldType>
176
+ </types>
177
+
178
+
179
+ <fields>
180
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
181
+ or Solr won't start. _version_ and update log are required for SolrCloud
182
+ -->
183
+ <field name="_version_" type="long" indexed="true" stored="true"/>
184
+
185
+ <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
186
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
187
+
188
+ <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
189
+ <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
190
+
191
+ <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
192
+
193
+ <!-- text (_t...) -->
194
+ <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
195
+ <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
196
+ <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
197
+ <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
198
+ <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
199
+ <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
200
+ <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
201
+ <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
202
+ <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
203
+ <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
204
+
205
+ <!-- English text (_te...) -->
206
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
207
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
208
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
209
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
210
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
211
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
212
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
213
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
214
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
215
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
216
+
217
+ <!-- string (_s...) -->
218
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
219
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
220
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
221
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
222
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
223
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
224
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
225
+
226
+ <!-- integer (_i...) -->
227
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
228
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
229
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
230
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
231
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
232
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
233
+
234
+ <!-- trie integer (_it...) (for faster range queries) -->
235
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
236
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
237
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
238
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
239
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
240
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
241
+
242
+ <!-- date (_dt...) -->
243
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
244
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
245
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
246
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
247
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
248
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
249
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
250
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
251
+
252
+ <!-- trie date (_dtt...) (for faster range queries) -->
253
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
254
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
255
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
256
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
257
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
258
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
259
+
260
+ <!-- long (_l...) -->
261
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
262
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
263
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
264
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
265
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
266
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
267
+
268
+ <!-- trie long (_lt...) (for faster range queries) -->
269
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
270
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
271
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
272
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
273
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
274
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
275
+
276
+ <!-- double (_db...) -->
277
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
278
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
279
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
280
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
281
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
282
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
283
+
284
+ <!-- trie double (_dbt...) (for faster range queries) -->
285
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
286
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
287
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
288
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
289
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
290
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
291
+
292
+ <!-- float (_f...) -->
293
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
294
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
295
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
296
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
297
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
298
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
299
+
300
+ <!-- trie float (_ft...) (for faster range queries) -->
301
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
302
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
303
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
304
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
305
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
306
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
307
+
308
+ <!-- boolean (_b...) -->
309
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
310
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
311
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
312
+
313
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
314
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
315
+
316
+ <!-- location (_ll...) -->
317
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
318
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
319
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
320
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
321
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
322
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
323
+
324
+ <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
325
+
326
+ <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
327
+ <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
328
+
329
+
330
+ </fields>
331
+
332
+ <!-- Field to use to determine and enforce document uniqueness.
333
+ Unless this field is marked with required="false", it will be a required field
334
+ -->
335
+ <uniqueKey>id</uniqueKey>
336
+
337
+ <!-- copyField commands copy one field to another at the time a document
338
+ is added to the index. It's used either to index the same field differently,
339
+ or to add multiple fields to the same field for easier/faster searching. -->
340
+ <!-- Copy Fields -->
341
+
342
+ <!-- Above, multiple source fields are copied to the [text] field.
343
+ Another way to map multiple source fields to the same
344
+ destination field is to use the dynamic field syntax.
345
+ copyField also supports a maxChars to copy setting. -->
346
+
347
+ <!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> -->
348
+ <!-- for suggestions -->
349
+ <copyField source="*_tesim" dest="suggest"/>
350
+ <copyField source="*_ssim" dest="suggest"/>
351
+
352
+ <!-- Similarity is the scoring routine for each document vs. a query.
353
+ A custom similarity may be specified here, but the default is fine
354
+ for most applications. -->
355
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
356
+ <!-- ... OR ...
357
+ Specify a SimilarityFactory class name implementation
358
+ allowing parameters to be used.
359
+ -->
360
+ <!--
361
+ <similarity class="com.example.solr.CustomSimilarityFactory">
362
+ <str name="paramkey">param value</str>
363
+ </similarity>
364
+ -->
365
+
366
+ </schema>
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ user=
17
+ solr_hostname=localhost
18
+ solr_port=8983
19
+ rsyncd_port=18983
20
+ data_dir=
21
+ webapp_name=solr
22
+ master_host=
23
+ master_data_dir=
24
+ master_status_dir=