arclight 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/.all-contributorsrc +450 -0
  3. data/.babelrc +3 -0
  4. data/.codeclimate.yml +5 -0
  5. data/.eslintrc +3 -0
  6. data/.rubocop.yml +19 -0
  7. data/.rubocop_todo.yml +15 -135
  8. data/.travis.yml +2 -2
  9. data/CONTRIBUTORS.md +79 -0
  10. data/README.md +21 -24
  11. data/Rakefile +0 -1
  12. data/app/assets/images/blacklight/bookmark.svg +1 -0
  13. data/app/assets/images/blacklight/collection.svg +5 -0
  14. data/app/assets/images/blacklight/compact.svg +1 -25
  15. data/app/assets/images/blacklight/container.svg +5 -0
  16. data/app/assets/images/blacklight/ead.svg +1 -0
  17. data/app/assets/images/blacklight/file.svg +5 -0
  18. data/app/assets/images/blacklight/folder.svg +1 -0
  19. data/app/assets/images/blacklight/list.svg +1 -0
  20. data/app/assets/images/blacklight/minus.svg +1 -0
  21. data/app/assets/images/blacklight/online.svg +5 -0
  22. data/app/assets/images/blacklight/pdf.svg +1 -0
  23. data/app/assets/images/blacklight/plus.svg +1 -0
  24. data/app/assets/images/blacklight/repository.svg +1 -0
  25. data/app/assets/javascripts/arclight/arclight.js +1 -3
  26. data/app/assets/javascripts/arclight/collection_navigation.js +36 -53
  27. data/app/assets/javascripts/arclight/collection_scrollspy.js +1 -1
  28. data/app/assets/javascripts/arclight/context_navigation.js +374 -0
  29. data/app/assets/javascripts/arclight/truncator.js.erb +8 -2
  30. data/app/assets/stylesheets/arclight/application.scss +3 -1
  31. data/app/assets/stylesheets/arclight/bootstrap_overrides.scss +23 -0
  32. data/app/assets/stylesheets/arclight/modules/context_navigation.scss +75 -0
  33. data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +28 -35
  34. data/app/assets/stylesheets/arclight/modules/highlights.scss +2 -1
  35. data/app/assets/stylesheets/arclight/modules/layout.scss +128 -14
  36. data/app/assets/stylesheets/arclight/modules/mastheads.scss +27 -5
  37. data/app/assets/stylesheets/arclight/modules/repositories.scss +1 -5
  38. data/app/assets/stylesheets/arclight/modules/repository_card.scss +6 -7
  39. data/app/assets/stylesheets/arclight/modules/search_results.scss +145 -24
  40. data/app/assets/stylesheets/arclight/modules/show_collection.scss +38 -59
  41. data/app/assets/stylesheets/arclight/responsive.scss +13 -0
  42. data/app/assets/stylesheets/arclight/variables.scss +21 -1
  43. data/app/controllers/concerns/arclight/ead_format_helpers.rb +225 -0
  44. data/app/controllers/concerns/arclight/field_config_helpers.rb +23 -7
  45. data/app/factories/blacklight_field_configuration_factory.rb +1 -0
  46. data/app/helpers/arclight_helper.rb +197 -35
  47. data/app/models/arclight/document_downloads.rb +125 -0
  48. data/app/models/arclight/parent.rb +4 -2
  49. data/app/models/arclight/parents.rb +6 -4
  50. data/app/models/arclight/requests/aeon_external_request.rb +42 -0
  51. data/app/models/arclight/requests/aeon_web_ead.rb +47 -0
  52. data/app/models/arclight/requests/google_form.rb +2 -2
  53. data/app/models/concerns/arclight/catalog.rb +14 -2
  54. data/app/models/concerns/arclight/search_behavior.rb +27 -12
  55. data/app/models/concerns/arclight/solr_document.rb +29 -7
  56. data/app/views/arclight/_requests.html.erb +7 -0
  57. data/app/views/arclight/repositories/_in_person_repository.html.erb +1 -1
  58. data/app/views/arclight/repositories/_repository.html.erb +2 -2
  59. data/app/views/arclight/repositories/_repository_contact.html.erb +9 -0
  60. data/app/views/arclight/repositories/index.html.erb +3 -0
  61. data/app/views/arclight/repositories/show.html.erb +5 -4
  62. data/app/views/arclight/requests/_aeon_external_request_endpoint.html.erb +9 -0
  63. data/app/views/arclight/requests/_aeon_web_ead.html.erb +7 -0
  64. data/app/views/arclight/requests/_google_form.html.erb +2 -1
  65. data/app/views/arclight/viewers/_oembed.html.erb +2 -1
  66. data/app/views/catalog/_access_contents.html.erb +15 -0
  67. data/app/views/catalog/_arclight_abstract_or_scope.html.erb +5 -0
  68. data/app/views/catalog/_arclight_bookmark_control.html.erb +38 -0
  69. data/app/views/catalog/_arclight_document_header_icon.html.erb +1 -0
  70. data/app/views/catalog/_arclight_index_compact_default.html.erb +18 -11
  71. data/app/views/catalog/_arclight_index_default.html.erb +45 -0
  72. data/app/views/catalog/_arclight_index_group_document_compact_default.html.erb +19 -0
  73. data/app/views/catalog/_arclight_index_group_document_default.html.erb +18 -0
  74. data/app/views/catalog/_arclight_online_content_indicator.html.erb +1 -3
  75. data/app/views/catalog/_collection_contents.html.erb +2 -10
  76. data/app/views/catalog/_collection_context.html.erb +15 -0
  77. data/app/views/catalog/_collection_context_nav.html.erb +12 -0
  78. data/app/views/catalog/_collection_online_contents.html.erb +3 -3
  79. data/app/views/catalog/_component_context.html.erb +5 -0
  80. data/app/views/catalog/_containers.html.erb +3 -0
  81. data/app/views/catalog/_context_sidebar.html.erb +2 -2
  82. data/app/views/catalog/_document_downloads.html.erb +14 -0
  83. data/app/views/catalog/_group.html.erb +21 -0
  84. data/app/views/catalog/_group_header_compact_default.html.erb +15 -0
  85. data/app/views/catalog/_group_header_default.html.erb +20 -0
  86. data/app/views/catalog/_group_toggle.html.erb +10 -0
  87. data/app/views/catalog/_home.html.erb +1 -1
  88. data/app/views/catalog/_index_breadcrumb_default.html.erb +5 -2
  89. data/app/views/catalog/_index_collection_context_default.html.erb +53 -0
  90. data/app/views/catalog/_index_header.html.erb +3 -3
  91. data/app/views/catalog/_index_online_contents_default.html.erb +1 -1
  92. data/app/views/catalog/_online_content_label.html.erb +5 -0
  93. data/app/views/catalog/_search_form.html.erb +34 -0
  94. data/app/views/catalog/_search_results.html.erb +1 -4
  95. data/app/views/catalog/_show_actions_box_default.html.erb +27 -0
  96. data/app/views/catalog/_show_breadcrumbs_default.html.erb +5 -20
  97. data/app/views/catalog/_show_collection.html.erb +42 -24
  98. data/app/views/catalog/_show_default.html.erb +63 -35
  99. data/app/views/catalog/_show_upper_metadata_default.html.erb +1 -1
  100. data/app/views/catalog/_sort_and_per_page.html.erb +8 -0
  101. data/app/views/catalog/_within_collection_dropdown.html.erb +26 -0
  102. data/app/views/shared/_breadcrumbs.html.erb +4 -4
  103. data/app/views/shared/_context_sidebar.html.erb +2 -2
  104. data/app/views/shared/_header_navbar.html.erb +13 -17
  105. data/app/views/shared/_show_breadcrumbs.html.erb +27 -0
  106. data/arclight.gemspec +5 -6
  107. data/config/i18n-tasks.yml +2 -1
  108. data/config/locales/arclight.en.yml +54 -21
  109. data/config/repositories.yml +0 -0
  110. data/lib/arclight/engine.rb +22 -12
  111. data/lib/arclight/hash_absolute_xpath.rb +11 -7
  112. data/lib/arclight/level_label.rb +46 -0
  113. data/lib/arclight/normalized_date.rb +2 -2
  114. data/lib/arclight/normalized_id.rb +1 -0
  115. data/lib/arclight/normalized_title.rb +1 -0
  116. data/lib/arclight/repository.rb +58 -5
  117. data/lib/arclight/traject/ead2_config.rb +178 -159
  118. data/lib/arclight/traject/nokogiri_namespaceless_reader.rb +22 -0
  119. data/lib/arclight/version.rb +1 -1
  120. data/lib/arclight/viewers/oembed.rb +1 -0
  121. data/lib/arclight/year_range.rb +9 -1
  122. data/lib/generators/arclight/install_generator.rb +5 -1
  123. data/lib/generators/arclight/templates/catalog_controller.rb +128 -100
  124. data/lib/generators/arclight/templates/config/downloads.yml +12 -0
  125. data/lib/generators/arclight/templates/config/repositories.yml +20 -2
  126. data/lib/generators/arclight/update_generator.rb +1 -1
  127. data/lib/tasks/index.rake +18 -20
  128. data/package.json +8 -1
  129. data/solr/conf/schema.xml +51 -292
  130. data/solr/conf/solrconfig.xml +40 -125
  131. data/tasks/arclight.rake +1 -0
  132. data/vendor/assets/javascripts/responsiveTruncator.js +2 -2
  133. metadata +71 -44
  134. data/app/assets/javascripts/arclight/collection_context.js +0 -18
  135. data/app/assets/javascripts/arclight/component_ancestors.js +0 -56
  136. data/app/assets/javascripts/arclight/search_results.js +0 -15
  137. data/app/assets/stylesheets/arclight/modules/sidebar.scss +0 -21
  138. data/app/views/catalog/_collection_count.html.erb +0 -7
  139. data/app/views/catalog/_collection_downloads.html.erb +0 -15
  140. data/app/views/catalog/_collection_overview.html.erb +0 -7
  141. data/app/views/catalog/_component_contents.html.erb +0 -16
  142. data/app/views/catalog/_component_overview.html.erb +0 -40
  143. data/app/views/catalog/_index_header_hierarchy_default.html.erb +0 -42
  144. data/app/views/catalog/_index_hierarchy_default.html.erb +0 -28
  145. data/app/views/catalog/_results_histogram.html.erb +0 -15
  146. data/app/views/catalog/_show_component_sidebar.html.erb +0 -12
  147. data/app/views/catalog/_show_sidebar.html.erb +0 -22
  148. data/lib/arclight/custom_component.rb +0 -99
  149. data/lib/arclight/custom_document.rb +0 -93
  150. data/lib/arclight/indexer.rb +0 -9
  151. data/lib/arclight/shared_indexing_behavior.rb +0 -97
  152. data/lib/arclight/shared_terminology_behavior.rb +0 -65
  153. data/lib/arclight/solr_ead_indexer_ext.rb +0 -155
@@ -11,8 +11,10 @@ nlm:
11
11
  phone: ''
12
12
  contact_info: 'hmdref@nlm.nih.gov'
13
13
  thumbnail_url: "https://collections.nlm.nih.gov/pageturnerserver/ajaxp?theurl=http://localhost:8080/fedora/get/nlm:nlmuid-101421040-img/THUMB"
14
- google_request_url: 'https://docs.google.com/a/stanford.edu/forms/d/e/1FAIpQLSeOamhY_IcFw4sPnz0ddwWWkrPaHbM5wp7JVbOLOL_mIusEyw/viewform'
15
- google_request_mappings: "document_url=entry.1980510262&collection_name=entry.619150170&collection_creator=entry.14428541&eadid=entry.996397105&containers=entry.1125277048&title=entry.862815208"
14
+ request_types:
15
+ google_form:
16
+ request_url: 'https://docs.google.com/a/stanford.edu/forms/d/e/1FAIpQLSeOamhY_IcFw4sPnz0ddwWWkrPaHbM5wp7JVbOLOL_mIusEyw/viewform'
17
+ request_mappings: "document_url=entry.1980510262&collection_name=entry.619150170&collection_creator=entry.14428541&eadid=entry.996397105&containers=entry.1125277048&title=entry.862815208"
16
18
  sul-spec:
17
19
  name: 'Stanford University Libraries. Special Collections and University Archives'
18
20
  visit_note: 'Special Collections and University Archives materials are stored offsite and must be paged 36 hours in advance.'
@@ -27,6 +29,10 @@ sul-spec:
27
29
  phone: '(650) 725-1022'
28
30
  contact_info: 'specialcollections@stanford.edu'
29
31
  thumbnail_url: 'https://library.stanford.edu/sites/default/files/styles/150x150/public/collection/image/Collections-Super-Enlight.jpg'
32
+ request_types:
33
+ aeon_web_ead:
34
+ request_url: 'https://sample.request.com'
35
+ request_mappings: 'Action=10&Form=31&Value=ead_url'
30
36
  umich-bhl:
31
37
  name: 'University of Michigan. Bentley Historical Library'
32
38
  description: 'The Bentley Historical Library collects the materials for and promotes the study of the histories of two great, intertwined institutions, the State of Michigan and the University of Michigan. The Library is open without fee to the public, and we welcome researchers regardless of academic or professional affiliation.'
@@ -40,3 +46,15 @@ umich-bhl:
40
46
  phone: ''
41
47
  contact_info: 'bentley.ref@umich.edu'
42
48
  thumbnail_url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Bhlexterior.jpg/150px-Bhlexterior.jpg'
49
+ request_types:
50
+ aeon_external_request_endpoint:
51
+ request_url: https://example.com/aeon/aeon.dll
52
+ request_mappings:
53
+ url_params:
54
+ Action: 11
55
+ Type: 200
56
+ static:
57
+ SystemId: ArcLight
58
+ ItemInfo1: manuscript
59
+ accessor:
60
+ ItemTitle: collection_name
@@ -9,7 +9,7 @@ module Arclight
9
9
  # downstream application to choose if they want to take our changes or not and
10
10
  # can choose to see a diff of our changes to help them decide.
11
11
  class Update < Arclight::Install
12
- source_root File.expand_path('../templates', __FILE__)
12
+ source_root File.expand_path('templates', __dir__)
13
13
 
14
14
  def create_blacklight_catalog
15
15
  copy_file 'catalog_controller.rb', 'app/controllers/catalog_controller.rb'
data/lib/tasks/index.rake CHANGED
@@ -4,7 +4,7 @@ require 'arclight'
4
4
  require 'benchmark'
5
5
 
6
6
  ##
7
- # Environment variables for indexing:
7
+ # Environment variables and information for indexing:
8
8
  #
9
9
  # REPOSITORY_ID for the repository id/slug to load repository data from
10
10
  # your configuration (default: none).
@@ -12,17 +12,23 @@ require 'benchmark'
12
12
  # REPOSITORY_FILE for the YAML file of your repository configuration
13
13
  # (default: config/repositories.yml).
14
14
  #
15
- # SOLR_URL for the location of your Solr instance
15
+ # Blacklight default connection for the location of your Solr instance, SOLR_URL
16
+ # as a backup
16
17
  # (default: http://127.0.0.1:8983/solr/blacklight-core)
17
18
  #
18
19
  namespace :arclight do
19
20
  desc 'Index an EAD document, use FILE=<path/to/ead.xml> and REPOSITORY_ID=<myid>'
20
21
  task :index do
21
22
  raise 'Please specify your EAD document, ex. FILE=<path/to/ead.xml>' unless ENV['FILE']
23
+
22
24
  print "Loading #{ENV['FILE']} into index...\n"
23
- load_indexer # a leftover construct from solr_ead. Likely will need to be removed/modified when we remove that
24
- elapsed_time = Benchmark.realtime {
25
- `bundle exec traject -u #{ENV['SOLR_URL']} -i xml -c #{Arclight::Engine.root}/lib/arclight/traject/ead2_config.rb #{ENV['FILE']}`
25
+ solr_url = begin
26
+ Blacklight.default_index.connection.base_uri
27
+ rescue StandardError
28
+ ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
29
+ end
30
+ elapsed_time = Benchmark.realtime {
31
+ `bundle exec traject -u #{solr_url} -i xml -c #{Arclight::Engine.root}/lib/arclight/traject/ead2_config.rb #{ENV['FILE']}`
26
32
  }
27
33
  print "Indexed #{ENV['FILE']} (in #{elapsed_time.round(3)} secs).\n"
28
34
  end
@@ -30,6 +36,7 @@ namespace :arclight do
30
36
  desc 'Index a directory of EADs, use DIR=<path/to/directory> and REPOSITORY_ID=<myid>'
31
37
  task :index_dir do
32
38
  raise 'Please specify your directory, ex. DIR=<path/to/directory>' unless ENV['DIR']
39
+
33
40
  Dir.glob(File.join(ENV['DIR'], '*.xml')).each do |file|
34
41
  system("rake arclight:index FILE=#{file}")
35
42
  end
@@ -38,6 +45,7 @@ namespace :arclight do
38
45
  desc 'Index an EAD document, use URL=<http[s]://domain/path/to/ead.xml> and REPOSITORY_ID=<myid>'
39
46
  task :index_url do
40
47
  raise 'Please specify your EAD document, ex. URL=<http[s]://domain/path/to/ead.xml>' unless ENV['URL']
48
+
41
49
  ead = Nokogiri::XML(open(ENV['URL']))
42
50
  tmp = Tempfile.new(["#{Time.now.to_i}-", '.xml'], encoding: 'utf-8')
43
51
  begin
@@ -55,9 +63,11 @@ namespace :arclight do
55
63
  desc 'Index EADs from a file of URLs, use BATCH=<path/to/urls.txt> and REPOSITORY_ID=<myid>'
56
64
  task :index_url_batch do
57
65
  raise 'Please specify your URLs file, ex. BATCH=<path/to/urls.txt>' unless ENV['BATCH']
66
+
58
67
  File.open(ENV['BATCH']).each_line do |l|
59
68
  ENV['URL'] = l.chomp
60
69
  next if ENV['URL'].empty?
70
+
61
71
  unless ENV['URL'] =~ /\A#{URI.regexp(%w[http https])}\z/
62
72
  puts "Skipping invalid looking url #{ENV['URL']}"
63
73
  next
@@ -69,21 +79,9 @@ namespace :arclight do
69
79
  end
70
80
 
71
81
  desc 'Destroy all documents in the index'
72
- task :destroy_index_docs do
82
+ task destroy_index_docs: :environment do
73
83
  puts 'Deleting all documents from index...'
74
- indexer = load_indexer
75
- indexer.delete_all
84
+ Blacklight.default_index.connection.delete_by_query('*:*')
85
+ Blacklight.default_index.connection.commit
76
86
  end
77
87
  end
78
-
79
- def load_indexer
80
- # hardcoded since we don't have access to Blacklight.connection_config[:url] here
81
- ENV['SOLR_URL'] ||= 'http://127.0.0.1:8983/solr/blacklight-core'
82
-
83
- options = {
84
- document: Arclight::CustomDocument,
85
- component: Arclight::CustomComponent
86
- }
87
-
88
- Arclight::Indexer.new(options)
89
- end
data/package.json CHANGED
@@ -2,13 +2,20 @@
2
2
  "name": "arclight",
3
3
  "description": "",
4
4
  "main": "index.js",
5
- "dependencies": {},
5
+ "dependencies": {
6
+ "@babel/core": "^7.5.5",
7
+ "@babel/plugin-external-helpers": "^7.2.0",
8
+ "@babel/plugin-transform-modules-umd": "^7.2.0",
9
+ "@babel/preset-env": "^7.5.5"
10
+ },
6
11
  "devDependencies": {
7
12
  "eslint": "^3.19.0",
8
13
  "eslint-config-airbnb-base": "^11.1.3",
9
14
  "eslint-plugin-import": "^2.2.0"
10
15
  },
11
16
  "scripts": {
17
+ "lint": "eslint './app/assets/javascripts/**/*.{js,es6}'",
18
+ "lint:fix": "eslint --fix './app/assets/javascripts/**/*.{js,es6}'",
12
19
  "test": "echo \"Error: no test specified\" && exit 1"
13
20
  },
14
21
  "repository": {
data/solr/conf/schema.xml CHANGED
@@ -67,7 +67,7 @@
67
67
  -->
68
68
 
69
69
  <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
70
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
70
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" docValues="true"/>
71
71
 
72
72
  <!-- boolean type: "true" or "false" -->
73
73
  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
@@ -89,29 +89,14 @@
89
89
  then default lucene sorting will be used which places docs without the
90
90
  field first in an ascending sort and last in a descending sort.
91
91
  -->
92
-
93
- <!--
94
- Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
95
- -->
96
- <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
97
- <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
98
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
99
- <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
100
-
101
92
  <!--
102
- Numeric field types that index each value at various levels of precision
103
- to accelerate range queries when the number of values between the range
104
- endpoints is large. See the javadoc for NumericRangeQuery for internal
105
- implementation details.
106
-
107
- Smaller precisionStep values (specified in bits) will lead to more tokens
108
- indexed per value, slightly larger index size, and faster range queries.
109
- A precisionStep of 0 disables indexing at different precision levels.
93
+ Numeric field types that index values using KD-trees.
94
+ Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
110
95
  -->
111
- <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
112
- <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
113
- <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
114
- <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
96
+ <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
97
+ <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
98
+ <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
99
+ <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
115
100
 
116
101
  <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
117
102
  is a more restricted form of the canonical representation of dateTime
@@ -135,10 +120,8 @@
135
120
 
136
121
  Note: For faster range queries, consider the tdate type
137
122
  -->
138
- <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
139
-
140
- <!-- A Trie based date field for faster date range queries and date faceting. -->
141
- <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
123
+ <!-- KD-tree versions of date fields -->
124
+ <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
142
125
 
143
126
  <!-- The "RandomSortField" is not used to store or search any
144
127
  data. You can declare fields of this type it in your schema
@@ -153,154 +136,61 @@
153
136
  -->
154
137
  <fieldType name="random" class="solr.RandomSortField" indexed="true" />
155
138
 
156
- <!-- solr.TextField allows the specification of custom text analyzers
157
- specified as a tokenizer and a list of token filters. Different
158
- analyzers may be specified for indexing and querying.
159
-
160
- The optional positionIncrementGap puts space between multiple fields of
161
- this type on the same document, with the purpose of preventing false phrase
162
- matching across fields.
163
-
164
- For more info on customizing your analyzer chain, please see
165
- http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
166
- -->
167
- <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
168
- <analyzer>
169
- <tokenizer class="solr.StandardTokenizerFactory"/>
170
- <filter class="solr.ICUFoldingFilterFactory" />
171
- <filter class="solr.SnowballPorterFilterFactory" language="English" />
172
- </analyzer>
173
- </fieldType>
174
-
175
- <!-- One can also specify an existing Analyzer class that has a
176
- default constructor via the class attribute on the analyzer element
177
- <fieldType name="text_greek" class="solr.TextField">
178
- <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
179
- </fieldType>
180
- -->
181
-
182
- <!-- A text field that only splits on whitespace for exact matching of words -->
183
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
139
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
184
140
  <analyzer>
185
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
186
- </analyzer>
187
- </fieldType>
188
-
189
- <!-- A general text field that has reasonable, generic
190
- cross-language defaults: it tokenizes with StandardTokenizer,
191
- removes stop words from case-insensitive "stopwords.txt"
192
- (empty by default), and down cases. At query time only, it
193
- also applies synonyms. -->
194
- <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
195
- <analyzer type="index">
196
- <tokenizer class="solr.StandardTokenizerFactory"/>
197
- <!-- in this example, we will only use synonyms at query time
198
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
199
- -->
200
- <filter class="solr.LowerCaseFilterFactory"/>
201
- </analyzer>
202
- <analyzer type="query">
203
- <tokenizer class="solr.StandardTokenizerFactory"/>
204
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
205
- <filter class="solr.LowerCaseFilterFactory"/>
141
+ <tokenizer class="solr.ICUTokenizerFactory" />
142
+ <filter class="solr.KeywordRepeatFilterFactory" />
143
+ <filter class="solr.ICUFoldingFilterFactory" />
144
+ <filter class="solr.PorterStemFilterFactory"/>
145
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
206
146
  </analyzer>
207
147
  </fieldType>
208
148
 
209
- <!-- A text field with defaults appropriate for English: it
210
- tokenizes with StandardTokenizer, removes English stop words
211
- (stopwords_en.txt), down cases, protects words from protwords.txt, and
212
- finally applies Porter's stemming. The query time analyzer
213
- also applies synonyms from synonyms.txt. -->
214
149
  <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
215
150
  <analyzer type="index">
216
- <tokenizer class="solr.StandardTokenizerFactory"/>
217
- <!-- in this example, we will only use synonyms at query time
218
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
219
- -->
220
- <filter class="solr.LowerCaseFilterFactory"/>
221
- <filter class="solr.EnglishPossessiveFilterFactory"/>
151
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
152
+ <filter class="solr.KeywordRepeatFilterFactory" />
153
+ <filter class="solr.WordDelimiterGraphFilterFactory"/>
154
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
155
+ <filter class="solr.ICUFoldingFilterFactory" />
222
156
  <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
223
- <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
224
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
225
- -->
226
157
  <filter class="solr.PorterStemFilterFactory"/>
158
+ <filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
159
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
227
160
  </analyzer>
228
161
  <analyzer type="query">
229
- <tokenizer class="solr.StandardTokenizerFactory"/>
162
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
163
+ <filter class="solr.KeywordRepeatFilterFactory" />
164
+ <filter class="solr.WordDelimiterGraphFilterFactory"/>
230
165
  <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
231
- <filter class="solr.LowerCaseFilterFactory"/>
232
- <filter class="solr.EnglishPossessiveFilterFactory"/>
166
+ <filter class="solr.ICUFoldingFilterFactory" />
167
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
233
168
  <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
234
- <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
235
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
236
- -->
237
169
  <filter class="solr.PorterStemFilterFactory"/>
170
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
238
171
  </analyzer>
239
172
  </fieldType>
240
173
 
241
- <!-- A text field with defaults appropriate for English, plus
242
- aggressive word-splitting and autophrase features enabled.
243
- This field is just like text_en, except it adds
244
- WordDelimiterFilter to enable splitting and matching of
245
- words on case-change, alpha numeric boundaries, and
246
- non-alphanumeric chars. This means certain compound word
247
- cases will work, for example query "wi fi" will match
248
- document "WiFi" or "wi-fi". However, other cases will still
249
- not match, for example if the query is "wifi" and the
250
- document is "wi fi" or if the query is "wi-fi" and the
251
- document is "wifi".
252
- -->
253
- <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
174
+ <fieldType name="identifier_match" class="solr.TextField" positionIncrementGap="100">
254
175
  <analyzer type="index">
255
176
  <tokenizer class="solr.WhitespaceTokenizerFactory"/>
256
- <!-- in this example, we will only use synonyms at query time
257
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
258
- -->
259
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
260
- <filter class="solr.LowerCaseFilterFactory"/>
261
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
262
- <filter class="solr.PorterStemFilterFactory"/>
177
+ <filter class="solr.WordDelimiterGraphFilterFactory"
178
+ catenateWords="1"
179
+ catenateNumbers="1"
180
+ catenateAll="1"
181
+ />
182
+ <filter class="solr.ICUFoldingFilterFactory" />
183
+ <filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
263
184
  </analyzer>
264
185
  <analyzer type="query">
265
186
  <tokenizer class="solr.WhitespaceTokenizerFactory"/>
266
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
267
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
268
- <filter class="solr.LowerCaseFilterFactory"/>
269
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
270
- <filter class="solr.PorterStemFilterFactory"/>
271
- </analyzer>
272
- </fieldType>
273
-
274
- <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
275
- but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
276
- <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
277
- <analyzer>
278
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
279
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
280
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
281
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
282
- <filter class="solr.LowerCaseFilterFactory"/>
283
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
284
- <filter class="solr.EnglishMinimalStemFilterFactory"/>
285
- <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
286
- possible with WordDelimiterFilter in conjuncton with stemming. -->
287
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
288
- </analyzer>
289
- </fieldType>
290
-
291
- <!-- Just like text_general except it reverses the characters of
292
- each token, to enable more efficient leading wildcard queries. -->
293
- <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
294
- <analyzer type="index">
295
- <tokenizer class="solr.StandardTokenizerFactory"/>
296
- <filter class="solr.LowerCaseFilterFactory"/>
297
- <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
298
- maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
299
- </analyzer>
300
- <analyzer type="query">
301
- <tokenizer class="solr.StandardTokenizerFactory"/>
302
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
303
- <filter class="solr.LowerCaseFilterFactory"/>
187
+ <filter class="solr.WordDelimiterGraphFilterFactory"
188
+ catenateWords="1"
189
+ catenateNumbers="1"
190
+ catenateAll="1"
191
+ />
192
+ <filter class="solr.ICUFoldingFilterFactory" />
193
+ <filter class="solr.WordDelimiterGraphFilterFactory"/>
304
194
  </analyzer>
305
195
  </fieldType>
306
196
 
@@ -313,7 +203,7 @@
313
203
  </analyzer>
314
204
  </fieldType>
315
205
 
316
- <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
206
+ <fieldType name="textSuggest" class="solr.TextField" positionIncrementGap="100">
317
207
  <analyzer>
318
208
  <tokenizer class="solr.KeywordTokenizerFactory"/>
319
209
  <filter class="solr.LowerCaseFilterFactory"/>
@@ -321,16 +211,6 @@
321
211
  </analyzer>
322
212
  </fieldType>
323
213
 
324
- <!-- charFilter + WhitespaceTokenizer -->
325
- <!--
326
- <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
327
- <analyzer>
328
- <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
329
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
330
- </analyzer>
331
- </fieldType>
332
- -->
333
-
334
214
  <!-- This is an example of using the KeywordTokenizer along
335
215
  With various TokenFilterFactories to produce a sortable field
336
216
  that does not include some properties of the source text
@@ -364,71 +244,9 @@
364
244
  </analyzer>
365
245
  </fieldType>
366
246
 
367
- <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
368
- <analyzer>
369
- <tokenizer class="solr.StandardTokenizerFactory"/>
370
- <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
371
- </analyzer>
372
- </fieldtype>
373
-
374
- <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
375
- <analyzer>
376
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
377
- <!--
378
- The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
379
- a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
380
- Attributes of the DelimitedPayloadTokenFilterFactory :
381
- "delimiter" - a one character delimiter. Default is | (pipe)
382
- "encoder" - how to encode the following value into a playload
383
- float -> org.apache.lucene.analysis.payloads.FloatEncoder,
384
- integer -> o.a.l.a.p.IntegerEncoder
385
- identity -> o.a.l.a.p.IdentityEncoder
386
- Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
387
- -->
388
- <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
389
- </analyzer>
390
- </fieldtype>
391
-
392
- <!-- lowercases the entire field value, keeping it as a single token. -->
393
- <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
394
- <analyzer>
395
- <tokenizer class="solr.KeywordTokenizerFactory"/>
396
- <filter class="solr.LowerCaseFilterFactory" />
397
- </analyzer>
398
- </fieldType>
399
-
400
- <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
401
- <analyzer>
402
- <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
403
- </analyzer>
404
- </fieldType>
405
-
406
- <!-- since fields of this type are by default not stored or indexed,
407
- any data added to them will be ignored outright. -->
408
- <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
409
-
410
- <!-- This point type indexes the coordinates as separate fields (subFields)
411
- If subFieldType is defined, it references a type, and a dynamic field
412
- definition is created matching *___<typename>. Alternately, if
413
- subFieldSuffix is defined, that is used to create the subFields.
414
- Example: if subFieldType="double", then the coordinates would be
415
- indexed in fields myloc_0___double,myloc_1___double.
416
- Example: if subFieldSuffix="_d" then the coordinates would be indexed
417
- in fields myloc_0_d,myloc_1_d
418
- The subFields are an implementation detail of the fieldType, and end
419
- users normally should not need to know about them.
420
- -->
421
- <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
422
-
423
247
  <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
424
248
  <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
425
249
 
426
- <!--
427
- A Geohash is a compact representation of a latitude longitude pair in a single field.
428
- See http://wiki.apache.org/solr/SpatialSearch
429
- -->
430
- <fieldtype name="geohash" class="solr.GeoHashField"/>
431
-
432
250
  <fieldType name="_nest_path_" class="solr.NestPathField" />
433
251
 
434
252
  </types>
@@ -460,11 +278,12 @@
460
278
 
461
279
  <!-- NOTE: this is not a full list of fields in the index; dynamic fields are also used -->
462
280
  <field name="id" type="string" indexed="true" stored="true" required="true" />
463
- <field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
464
- <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
281
+ <field name="_version_" type="plong" indexed="true" stored="true" multiValued="false" />
282
+ <field name="timestamp" type="pdate" indexed="true" stored="true" default="NOW" multiValued="false"/>
465
283
  <!-- default, catch all search field -->
466
284
  <field name="text" type="text" indexed="true" stored="true" multiValued="true"/>
467
-
285
+ <field name="unitid_identifier_match" type="identifier_match" indexed="true" multiValued="true" />
286
+
468
287
  <field name="_root_" type="string" indexed="true" stored="true" docValues="false" />
469
288
  <field name="_nest_parent_" type="string" indexed="true" stored="true"/>
470
289
  <field name="_nest_path_" type="_nest_path_" indexed="true" stored="true"/>
@@ -476,37 +295,9 @@
476
295
  EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
477
296
  Longer patterns will be matched first. if equal size patterns
478
297
  both match, the first appearing in the schema will be used. -->
479
- <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
480
- <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
481
- <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
482
- <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/>
483
- <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
484
- <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
485
- <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
486
- <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
487
-
488
- <!-- Type used to index the lat and lon components for the "location" FieldType -->
489
- <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
490
-
491
- <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
492
- <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
493
-
494
- <!-- some trie-coded dynamic fields for faster range queries -->
495
- <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
496
- <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
497
- <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
498
- <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
499
- <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
500
-
501
- <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
502
- <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
503
-
504
298
  <dynamicField name="random_*" type="random" />
505
299
 
506
- <dynamicField name="*_display" type="string" indexed="false" stored="true" multiValued="true" />
507
- <dynamicField name="*_facet" type="string" indexed="true" stored="false" multiValued="true" />
508
300
  <dynamicField name="*_sort" type="alphaNumericSort" indexed="true" stored="false" multiValued="false" />
509
- <dynamicField name="*_unstem_search" type="text_general" indexed="true" stored="false" multiValued="true" />
510
301
  <dynamicField name="*spell" type="textSpell" indexed="true" stored="false" multiValued="true" />
511
302
  <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
512
303
 
@@ -523,12 +314,8 @@
523
314
  <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true" />
524
315
  <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false" />
525
316
  <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true" />
526
- <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false" />
527
- <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true" />
528
- <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false" />
529
- <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true" />
530
- <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false" />
531
-
317
+ <dynamicField name="*_isim" type="pint" stored="true" indexed="true" multiValued="true" />
318
+ <dynamicField name="*_ii" type="pint" stored="false" indexed="true" multiValued="false" />
532
319
  </fields>
533
320
 
534
321
  <!-- Field to use to determine and enforce document uniqueness.
@@ -581,9 +368,7 @@
581
368
  <copyField source="userestrict_teim" dest="text" />
582
369
  <!-- grab structured data that's important -->
583
370
  <copyField source="unitid_ssm" dest="text" />
584
-
585
- <!-- unstemmed fields -->
586
- <!-- <copyField source="title_t" dest="title_unstem_search"/> -->
371
+ <copyField source="unitid_ssm" dest="unitid_identifier_match" />
587
372
 
588
373
  <!-- sort fields -->
589
374
  <copyField source="normalized_title_ssm" dest="title_sort"/> <!-- TODO: assumes single values -->
@@ -602,30 +387,4 @@
602
387
  <copyField source="places_ssim" dest="suggest"/>
603
388
  <copyField source="access_subjects_ssim" dest="suggest"/>
604
389
 
605
- <!-- Above, multiple source fields are copied to the [text] field.
606
- Another way to map multiple source fields to the same
607
- destination field is to use the dynamic field syntax.
608
- copyField also supports a maxChars to copy setting. -->
609
-
610
- <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
611
-
612
- <!-- copy name to alphaNameSort, a field designed for sorting by name -->
613
- <!-- <copyField source="name" dest="alphaNameSort"/> -->
614
-
615
-
616
- <!-- Similarity is the scoring routine for each document vs. a query.
617
- A custom similarity may be specified here, but the default is fine
618
- for most applications. -->
619
- <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
620
- <!-- ... OR ...
621
- Specify a SimilarityFactory class name implementation
622
- allowing parameters to be used.
623
- -->
624
- <!--
625
- <similarity class="com.example.solr.CustomSimilarityFactory">
626
- <str name="paramkey">param value</str>
627
- </similarity>
628
- -->
629
-
630
-
631
390
  </schema>