arclight 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.all-contributorsrc +450 -0
- data/.babelrc +3 -0
- data/.codeclimate.yml +5 -0
- data/.eslintrc +3 -0
- data/.rubocop.yml +19 -0
- data/.rubocop_todo.yml +15 -135
- data/.travis.yml +2 -2
- data/CONTRIBUTORS.md +79 -0
- data/README.md +21 -24
- data/Rakefile +0 -1
- data/app/assets/images/blacklight/bookmark.svg +1 -0
- data/app/assets/images/blacklight/collection.svg +5 -0
- data/app/assets/images/blacklight/compact.svg +1 -25
- data/app/assets/images/blacklight/container.svg +5 -0
- data/app/assets/images/blacklight/ead.svg +1 -0
- data/app/assets/images/blacklight/file.svg +5 -0
- data/app/assets/images/blacklight/folder.svg +1 -0
- data/app/assets/images/blacklight/list.svg +1 -0
- data/app/assets/images/blacklight/minus.svg +1 -0
- data/app/assets/images/blacklight/online.svg +5 -0
- data/app/assets/images/blacklight/pdf.svg +1 -0
- data/app/assets/images/blacklight/plus.svg +1 -0
- data/app/assets/images/blacklight/repository.svg +1 -0
- data/app/assets/javascripts/arclight/arclight.js +1 -3
- data/app/assets/javascripts/arclight/collection_navigation.js +36 -53
- data/app/assets/javascripts/arclight/collection_scrollspy.js +1 -1
- data/app/assets/javascripts/arclight/context_navigation.js +374 -0
- data/app/assets/javascripts/arclight/truncator.js.erb +8 -2
- data/app/assets/stylesheets/arclight/application.scss +3 -1
- data/app/assets/stylesheets/arclight/bootstrap_overrides.scss +23 -0
- data/app/assets/stylesheets/arclight/modules/context_navigation.scss +75 -0
- data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +28 -35
- data/app/assets/stylesheets/arclight/modules/highlights.scss +2 -1
- data/app/assets/stylesheets/arclight/modules/layout.scss +128 -14
- data/app/assets/stylesheets/arclight/modules/mastheads.scss +27 -5
- data/app/assets/stylesheets/arclight/modules/repositories.scss +1 -5
- data/app/assets/stylesheets/arclight/modules/repository_card.scss +6 -7
- data/app/assets/stylesheets/arclight/modules/search_results.scss +145 -24
- data/app/assets/stylesheets/arclight/modules/show_collection.scss +38 -59
- data/app/assets/stylesheets/arclight/responsive.scss +13 -0
- data/app/assets/stylesheets/arclight/variables.scss +21 -1
- data/app/controllers/concerns/arclight/ead_format_helpers.rb +225 -0
- data/app/controllers/concerns/arclight/field_config_helpers.rb +23 -7
- data/app/factories/blacklight_field_configuration_factory.rb +1 -0
- data/app/helpers/arclight_helper.rb +197 -35
- data/app/models/arclight/document_downloads.rb +125 -0
- data/app/models/arclight/parent.rb +4 -2
- data/app/models/arclight/parents.rb +6 -4
- data/app/models/arclight/requests/aeon_external_request.rb +42 -0
- data/app/models/arclight/requests/aeon_web_ead.rb +47 -0
- data/app/models/arclight/requests/google_form.rb +2 -2
- data/app/models/concerns/arclight/catalog.rb +14 -2
- data/app/models/concerns/arclight/search_behavior.rb +27 -12
- data/app/models/concerns/arclight/solr_document.rb +29 -7
- data/app/views/arclight/_requests.html.erb +7 -0
- data/app/views/arclight/repositories/_in_person_repository.html.erb +1 -1
- data/app/views/arclight/repositories/_repository.html.erb +2 -2
- data/app/views/arclight/repositories/_repository_contact.html.erb +9 -0
- data/app/views/arclight/repositories/index.html.erb +3 -0
- data/app/views/arclight/repositories/show.html.erb +5 -4
- data/app/views/arclight/requests/_aeon_external_request_endpoint.html.erb +9 -0
- data/app/views/arclight/requests/_aeon_web_ead.html.erb +7 -0
- data/app/views/arclight/requests/_google_form.html.erb +2 -1
- data/app/views/arclight/viewers/_oembed.html.erb +2 -1
- data/app/views/catalog/_access_contents.html.erb +15 -0
- data/app/views/catalog/_arclight_abstract_or_scope.html.erb +5 -0
- data/app/views/catalog/_arclight_bookmark_control.html.erb +38 -0
- data/app/views/catalog/_arclight_document_header_icon.html.erb +1 -0
- data/app/views/catalog/_arclight_index_compact_default.html.erb +18 -11
- data/app/views/catalog/_arclight_index_default.html.erb +45 -0
- data/app/views/catalog/_arclight_index_group_document_compact_default.html.erb +19 -0
- data/app/views/catalog/_arclight_index_group_document_default.html.erb +18 -0
- data/app/views/catalog/_arclight_online_content_indicator.html.erb +1 -3
- data/app/views/catalog/_collection_contents.html.erb +2 -10
- data/app/views/catalog/_collection_context.html.erb +15 -0
- data/app/views/catalog/_collection_context_nav.html.erb +12 -0
- data/app/views/catalog/_collection_online_contents.html.erb +3 -3
- data/app/views/catalog/_component_context.html.erb +5 -0
- data/app/views/catalog/_containers.html.erb +3 -0
- data/app/views/catalog/_context_sidebar.html.erb +2 -2
- data/app/views/catalog/_document_downloads.html.erb +14 -0
- data/app/views/catalog/_group.html.erb +21 -0
- data/app/views/catalog/_group_header_compact_default.html.erb +15 -0
- data/app/views/catalog/_group_header_default.html.erb +20 -0
- data/app/views/catalog/_group_toggle.html.erb +10 -0
- data/app/views/catalog/_home.html.erb +1 -1
- data/app/views/catalog/_index_breadcrumb_default.html.erb +5 -2
- data/app/views/catalog/_index_collection_context_default.html.erb +53 -0
- data/app/views/catalog/_index_header.html.erb +3 -3
- data/app/views/catalog/_index_online_contents_default.html.erb +1 -1
- data/app/views/catalog/_online_content_label.html.erb +5 -0
- data/app/views/catalog/_search_form.html.erb +34 -0
- data/app/views/catalog/_search_results.html.erb +1 -4
- data/app/views/catalog/_show_actions_box_default.html.erb +27 -0
- data/app/views/catalog/_show_breadcrumbs_default.html.erb +5 -20
- data/app/views/catalog/_show_collection.html.erb +42 -24
- data/app/views/catalog/_show_default.html.erb +63 -35
- data/app/views/catalog/_show_upper_metadata_default.html.erb +1 -1
- data/app/views/catalog/_sort_and_per_page.html.erb +8 -0
- data/app/views/catalog/_within_collection_dropdown.html.erb +26 -0
- data/app/views/shared/_breadcrumbs.html.erb +4 -4
- data/app/views/shared/_context_sidebar.html.erb +2 -2
- data/app/views/shared/_header_navbar.html.erb +13 -17
- data/app/views/shared/_show_breadcrumbs.html.erb +27 -0
- data/arclight.gemspec +5 -6
- data/config/i18n-tasks.yml +2 -1
- data/config/locales/arclight.en.yml +54 -21
- data/config/repositories.yml +0 -0
- data/lib/arclight/engine.rb +22 -12
- data/lib/arclight/hash_absolute_xpath.rb +11 -7
- data/lib/arclight/level_label.rb +46 -0
- data/lib/arclight/normalized_date.rb +2 -2
- data/lib/arclight/normalized_id.rb +1 -0
- data/lib/arclight/normalized_title.rb +1 -0
- data/lib/arclight/repository.rb +58 -5
- data/lib/arclight/traject/ead2_config.rb +178 -159
- data/lib/arclight/traject/nokogiri_namespaceless_reader.rb +22 -0
- data/lib/arclight/version.rb +1 -1
- data/lib/arclight/viewers/oembed.rb +1 -0
- data/lib/arclight/year_range.rb +9 -1
- data/lib/generators/arclight/install_generator.rb +5 -1
- data/lib/generators/arclight/templates/catalog_controller.rb +128 -100
- data/lib/generators/arclight/templates/config/downloads.yml +12 -0
- data/lib/generators/arclight/templates/config/repositories.yml +20 -2
- data/lib/generators/arclight/update_generator.rb +1 -1
- data/lib/tasks/index.rake +18 -20
- data/package.json +8 -1
- data/solr/conf/schema.xml +51 -292
- data/solr/conf/solrconfig.xml +40 -125
- data/tasks/arclight.rake +1 -0
- data/vendor/assets/javascripts/responsiveTruncator.js +2 -2
- metadata +71 -44
- data/app/assets/javascripts/arclight/collection_context.js +0 -18
- data/app/assets/javascripts/arclight/component_ancestors.js +0 -56
- data/app/assets/javascripts/arclight/search_results.js +0 -15
- data/app/assets/stylesheets/arclight/modules/sidebar.scss +0 -21
- data/app/views/catalog/_collection_count.html.erb +0 -7
- data/app/views/catalog/_collection_downloads.html.erb +0 -15
- data/app/views/catalog/_collection_overview.html.erb +0 -7
- data/app/views/catalog/_component_contents.html.erb +0 -16
- data/app/views/catalog/_component_overview.html.erb +0 -40
- data/app/views/catalog/_index_header_hierarchy_default.html.erb +0 -42
- data/app/views/catalog/_index_hierarchy_default.html.erb +0 -28
- data/app/views/catalog/_results_histogram.html.erb +0 -15
- data/app/views/catalog/_show_component_sidebar.html.erb +0 -12
- data/app/views/catalog/_show_sidebar.html.erb +0 -22
- data/lib/arclight/custom_component.rb +0 -99
- data/lib/arclight/custom_document.rb +0 -93
- data/lib/arclight/indexer.rb +0 -9
- data/lib/arclight/shared_indexing_behavior.rb +0 -97
- data/lib/arclight/shared_terminology_behavior.rb +0 -65
- data/lib/arclight/solr_ead_indexer_ext.rb +0 -155
@@ -11,8 +11,10 @@ nlm:
|
|
11
11
|
phone: ''
|
12
12
|
contact_info: 'hmdref@nlm.nih.gov'
|
13
13
|
thumbnail_url: "https://collections.nlm.nih.gov/pageturnerserver/ajaxp?theurl=http://localhost:8080/fedora/get/nlm:nlmuid-101421040-img/THUMB"
|
14
|
-
|
15
|
-
|
14
|
+
request_types:
|
15
|
+
google_form:
|
16
|
+
request_url: 'https://docs.google.com/a/stanford.edu/forms/d/e/1FAIpQLSeOamhY_IcFw4sPnz0ddwWWkrPaHbM5wp7JVbOLOL_mIusEyw/viewform'
|
17
|
+
request_mappings: "document_url=entry.1980510262&collection_name=entry.619150170&collection_creator=entry.14428541&eadid=entry.996397105&containers=entry.1125277048&title=entry.862815208"
|
16
18
|
sul-spec:
|
17
19
|
name: 'Stanford University Libraries. Special Collections and University Archives'
|
18
20
|
visit_note: 'Special Collections and University Archives materials are stored offsite and must be paged 36 hours in advance.'
|
@@ -27,6 +29,10 @@ sul-spec:
|
|
27
29
|
phone: '(650) 725-1022'
|
28
30
|
contact_info: 'specialcollections@stanford.edu'
|
29
31
|
thumbnail_url: 'https://library.stanford.edu/sites/default/files/styles/150x150/public/collection/image/Collections-Super-Enlight.jpg'
|
32
|
+
request_types:
|
33
|
+
aeon_web_ead:
|
34
|
+
request_url: 'https://sample.request.com'
|
35
|
+
request_mappings: 'Action=10&Form=31&Value=ead_url'
|
30
36
|
umich-bhl:
|
31
37
|
name: 'University of Michigan. Bentley Historical Library'
|
32
38
|
description: 'The Bentley Historical Library collects the materials for and promotes the study of the histories of two great, intertwined institutions, the State of Michigan and the University of Michigan. The Library is open without fee to the public, and we welcome researchers regardless of academic or professional affiliation.'
|
@@ -40,3 +46,15 @@ umich-bhl:
|
|
40
46
|
phone: ''
|
41
47
|
contact_info: 'bentley.ref@umich.edu'
|
42
48
|
thumbnail_url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Bhlexterior.jpg/150px-Bhlexterior.jpg'
|
49
|
+
request_types:
|
50
|
+
aeon_external_request_endpoint:
|
51
|
+
request_url: https://example.com/aeon/aeon.dll
|
52
|
+
request_mappings:
|
53
|
+
url_params:
|
54
|
+
Action: 11
|
55
|
+
Type: 200
|
56
|
+
static:
|
57
|
+
SystemId: ArcLight
|
58
|
+
ItemInfo1: manuscript
|
59
|
+
accessor:
|
60
|
+
ItemTitle: collection_name
|
@@ -9,7 +9,7 @@ module Arclight
|
|
9
9
|
# downstream application to choose if they want to take our changes or not and
|
10
10
|
# can choose to see a diff of our changes to help them decide.
|
11
11
|
class Update < Arclight::Install
|
12
|
-
source_root File.expand_path('
|
12
|
+
source_root File.expand_path('templates', __dir__)
|
13
13
|
|
14
14
|
def create_blacklight_catalog
|
15
15
|
copy_file 'catalog_controller.rb', 'app/controllers/catalog_controller.rb'
|
data/lib/tasks/index.rake
CHANGED
@@ -4,7 +4,7 @@ require 'arclight'
|
|
4
4
|
require 'benchmark'
|
5
5
|
|
6
6
|
##
|
7
|
-
# Environment variables for indexing:
|
7
|
+
# Environment variables and information for indexing:
|
8
8
|
#
|
9
9
|
# REPOSITORY_ID for the repository id/slug to load repository data from
|
10
10
|
# your configuration (default: none).
|
@@ -12,17 +12,23 @@ require 'benchmark'
|
|
12
12
|
# REPOSITORY_FILE for the YAML file of your repository configuration
|
13
13
|
# (default: config/repositories.yml).
|
14
14
|
#
|
15
|
-
#
|
15
|
+
# Blacklight default connection for the location of your Solr instance, SOLR_URL
|
16
|
+
# as a backup
|
16
17
|
# (default: http://127.0.0.1:8983/solr/blacklight-core)
|
17
18
|
#
|
18
19
|
namespace :arclight do
|
19
20
|
desc 'Index an EAD document, use FILE=<path/to/ead.xml> and REPOSITORY_ID=<myid>'
|
20
21
|
task :index do
|
21
22
|
raise 'Please specify your EAD document, ex. FILE=<path/to/ead.xml>' unless ENV['FILE']
|
23
|
+
|
22
24
|
print "Loading #{ENV['FILE']} into index...\n"
|
23
|
-
|
24
|
-
|
25
|
-
|
25
|
+
solr_url = begin
|
26
|
+
Blacklight.default_index.connection.base_uri
|
27
|
+
rescue StandardError
|
28
|
+
ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
29
|
+
end
|
30
|
+
elapsed_time = Benchmark.realtime {
|
31
|
+
`bundle exec traject -u #{solr_url} -i xml -c #{Arclight::Engine.root}/lib/arclight/traject/ead2_config.rb #{ENV['FILE']}`
|
26
32
|
}
|
27
33
|
print "Indexed #{ENV['FILE']} (in #{elapsed_time.round(3)} secs).\n"
|
28
34
|
end
|
@@ -30,6 +36,7 @@ namespace :arclight do
|
|
30
36
|
desc 'Index a directory of EADs, use DIR=<path/to/directory> and REPOSITORY_ID=<myid>'
|
31
37
|
task :index_dir do
|
32
38
|
raise 'Please specify your directory, ex. DIR=<path/to/directory>' unless ENV['DIR']
|
39
|
+
|
33
40
|
Dir.glob(File.join(ENV['DIR'], '*.xml')).each do |file|
|
34
41
|
system("rake arclight:index FILE=#{file}")
|
35
42
|
end
|
@@ -38,6 +45,7 @@ namespace :arclight do
|
|
38
45
|
desc 'Index an EAD document, use URL=<http[s]://domain/path/to/ead.xml> and REPOSITORY_ID=<myid>'
|
39
46
|
task :index_url do
|
40
47
|
raise 'Please specify your EAD document, ex. URL=<http[s]://domain/path/to/ead.xml>' unless ENV['URL']
|
48
|
+
|
41
49
|
ead = Nokogiri::XML(open(ENV['URL']))
|
42
50
|
tmp = Tempfile.new(["#{Time.now.to_i}-", '.xml'], encoding: 'utf-8')
|
43
51
|
begin
|
@@ -55,9 +63,11 @@ namespace :arclight do
|
|
55
63
|
desc 'Index EADs from a file of URLs, use BATCH=<path/to/urls.txt> and REPOSITORY_ID=<myid>'
|
56
64
|
task :index_url_batch do
|
57
65
|
raise 'Please specify your URLs file, ex. BATCH=<path/to/urls.txt>' unless ENV['BATCH']
|
66
|
+
|
58
67
|
File.open(ENV['BATCH']).each_line do |l|
|
59
68
|
ENV['URL'] = l.chomp
|
60
69
|
next if ENV['URL'].empty?
|
70
|
+
|
61
71
|
unless ENV['URL'] =~ /\A#{URI.regexp(%w[http https])}\z/
|
62
72
|
puts "Skipping invalid looking url #{ENV['URL']}"
|
63
73
|
next
|
@@ -69,21 +79,9 @@ namespace :arclight do
|
|
69
79
|
end
|
70
80
|
|
71
81
|
desc 'Destroy all documents in the index'
|
72
|
-
task :
|
82
|
+
task destroy_index_docs: :environment do
|
73
83
|
puts 'Deleting all documents from index...'
|
74
|
-
|
75
|
-
|
84
|
+
Blacklight.default_index.connection.delete_by_query('*:*')
|
85
|
+
Blacklight.default_index.connection.commit
|
76
86
|
end
|
77
87
|
end
|
78
|
-
|
79
|
-
def load_indexer
|
80
|
-
# hardcoded since we don't have access to Blacklight.connection_config[:url] here
|
81
|
-
ENV['SOLR_URL'] ||= 'http://127.0.0.1:8983/solr/blacklight-core'
|
82
|
-
|
83
|
-
options = {
|
84
|
-
document: Arclight::CustomDocument,
|
85
|
-
component: Arclight::CustomComponent
|
86
|
-
}
|
87
|
-
|
88
|
-
Arclight::Indexer.new(options)
|
89
|
-
end
|
data/package.json
CHANGED
@@ -2,13 +2,20 @@
|
|
2
2
|
"name": "arclight",
|
3
3
|
"description": "",
|
4
4
|
"main": "index.js",
|
5
|
-
"dependencies": {
|
5
|
+
"dependencies": {
|
6
|
+
"@babel/core": "^7.5.5",
|
7
|
+
"@babel/plugin-external-helpers": "^7.2.0",
|
8
|
+
"@babel/plugin-transform-modules-umd": "^7.2.0",
|
9
|
+
"@babel/preset-env": "^7.5.5"
|
10
|
+
},
|
6
11
|
"devDependencies": {
|
7
12
|
"eslint": "^3.19.0",
|
8
13
|
"eslint-config-airbnb-base": "^11.1.3",
|
9
14
|
"eslint-plugin-import": "^2.2.0"
|
10
15
|
},
|
11
16
|
"scripts": {
|
17
|
+
"lint": "eslint './app/assets/javascripts/**/*.{js,es6}'",
|
18
|
+
"lint:fix": "eslint --fix './app/assets/javascripts/**/*.{js,es6}'",
|
12
19
|
"test": "echo \"Error: no test specified\" && exit 1"
|
13
20
|
},
|
14
21
|
"repository": {
|
data/solr/conf/schema.xml
CHANGED
@@ -67,7 +67,7 @@
|
|
67
67
|
-->
|
68
68
|
|
69
69
|
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
70
|
-
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
70
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" docValues="true"/>
|
71
71
|
|
72
72
|
<!-- boolean type: "true" or "false" -->
|
73
73
|
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
@@ -89,29 +89,14 @@
|
|
89
89
|
then default lucene sorting will be used which places docs without the
|
90
90
|
field first in an ascending sort and last in a descending sort.
|
91
91
|
-->
|
92
|
-
|
93
|
-
<!--
|
94
|
-
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
95
|
-
-->
|
96
|
-
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
97
|
-
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
98
|
-
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
99
|
-
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
100
|
-
|
101
92
|
<!--
|
102
|
-
|
103
|
-
|
104
|
-
endpoints is large. See the javadoc for NumericRangeQuery for internal
|
105
|
-
implementation details.
|
106
|
-
|
107
|
-
Smaller precisionStep values (specified in bits) will lead to more tokens
|
108
|
-
indexed per value, slightly larger index size, and faster range queries.
|
109
|
-
A precisionStep of 0 disables indexing at different precision levels.
|
93
|
+
Numeric field types that index values using KD-trees.
|
94
|
+
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
|
110
95
|
-->
|
111
|
-
<fieldType name="
|
112
|
-
<fieldType name="
|
113
|
-
<fieldType name="
|
114
|
-
<fieldType name="
|
96
|
+
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
|
97
|
+
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
|
98
|
+
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
|
99
|
+
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
|
115
100
|
|
116
101
|
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
117
102
|
is a more restricted form of the canonical representation of dateTime
|
@@ -135,10 +120,8 @@
|
|
135
120
|
|
136
121
|
Note: For faster range queries, consider the tdate type
|
137
122
|
-->
|
138
|
-
|
139
|
-
|
140
|
-
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
141
|
-
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
123
|
+
<!-- KD-tree versions of date fields -->
|
124
|
+
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
|
142
125
|
|
143
126
|
<!-- The "RandomSortField" is not used to store or search any
|
144
127
|
data. You can declare fields of this type it in your schema
|
@@ -153,154 +136,61 @@
|
|
153
136
|
-->
|
154
137
|
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
155
138
|
|
156
|
-
|
157
|
-
specified as a tokenizer and a list of token filters. Different
|
158
|
-
analyzers may be specified for indexing and querying.
|
159
|
-
|
160
|
-
The optional positionIncrementGap puts space between multiple fields of
|
161
|
-
this type on the same document, with the purpose of preventing false phrase
|
162
|
-
matching across fields.
|
163
|
-
|
164
|
-
For more info on customizing your analyzer chain, please see
|
165
|
-
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
166
|
-
-->
|
167
|
-
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
168
|
-
<analyzer>
|
169
|
-
<tokenizer class="solr.StandardTokenizerFactory"/>
|
170
|
-
<filter class="solr.ICUFoldingFilterFactory" />
|
171
|
-
<filter class="solr.SnowballPorterFilterFactory" language="English" />
|
172
|
-
</analyzer>
|
173
|
-
</fieldType>
|
174
|
-
|
175
|
-
<!-- One can also specify an existing Analyzer class that has a
|
176
|
-
default constructor via the class attribute on the analyzer element
|
177
|
-
<fieldType name="text_greek" class="solr.TextField">
|
178
|
-
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
179
|
-
</fieldType>
|
180
|
-
-->
|
181
|
-
|
182
|
-
<!-- A text field that only splits on whitespace for exact matching of words -->
|
183
|
-
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
139
|
+
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
184
140
|
<analyzer>
|
185
|
-
<tokenizer class="solr.
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
cross-language defaults: it tokenizes with StandardTokenizer,
|
191
|
-
removes stop words from case-insensitive "stopwords.txt"
|
192
|
-
(empty by default), and down cases. At query time only, it
|
193
|
-
also applies synonyms. -->
|
194
|
-
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
195
|
-
<analyzer type="index">
|
196
|
-
<tokenizer class="solr.StandardTokenizerFactory"/>
|
197
|
-
<!-- in this example, we will only use synonyms at query time
|
198
|
-
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
199
|
-
-->
|
200
|
-
<filter class="solr.LowerCaseFilterFactory"/>
|
201
|
-
</analyzer>
|
202
|
-
<analyzer type="query">
|
203
|
-
<tokenizer class="solr.StandardTokenizerFactory"/>
|
204
|
-
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
205
|
-
<filter class="solr.LowerCaseFilterFactory"/>
|
141
|
+
<tokenizer class="solr.ICUTokenizerFactory" />
|
142
|
+
<filter class="solr.KeywordRepeatFilterFactory" />
|
143
|
+
<filter class="solr.ICUFoldingFilterFactory" />
|
144
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
145
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
206
146
|
</analyzer>
|
207
147
|
</fieldType>
|
208
148
|
|
209
|
-
<!-- A text field with defaults appropriate for English: it
|
210
|
-
tokenizes with StandardTokenizer, removes English stop words
|
211
|
-
(stopwords_en.txt), down cases, protects words from protwords.txt, and
|
212
|
-
finally applies Porter's stemming. The query time analyzer
|
213
|
-
also applies synonyms from synonyms.txt. -->
|
214
149
|
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
215
150
|
<analyzer type="index">
|
216
|
-
<tokenizer class="solr.
|
217
|
-
|
218
|
-
<filter class="solr.
|
219
|
-
|
220
|
-
<filter class="solr.
|
221
|
-
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
151
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
152
|
+
<filter class="solr.KeywordRepeatFilterFactory" />
|
153
|
+
<filter class="solr.WordDelimiterGraphFilterFactory"/>
|
154
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
155
|
+
<filter class="solr.ICUFoldingFilterFactory" />
|
222
156
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
223
|
-
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
224
|
-
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
225
|
-
-->
|
226
157
|
<filter class="solr.PorterStemFilterFactory"/>
|
158
|
+
<filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
|
159
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
227
160
|
</analyzer>
|
228
161
|
<analyzer type="query">
|
229
|
-
<tokenizer class="solr.
|
162
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
163
|
+
<filter class="solr.KeywordRepeatFilterFactory" />
|
164
|
+
<filter class="solr.WordDelimiterGraphFilterFactory"/>
|
230
165
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
231
|
-
<filter class="solr.
|
232
|
-
|
166
|
+
<filter class="solr.ICUFoldingFilterFactory" />
|
167
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
233
168
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
234
|
-
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
235
|
-
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
236
|
-
-->
|
237
169
|
<filter class="solr.PorterStemFilterFactory"/>
|
170
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
238
171
|
</analyzer>
|
239
172
|
</fieldType>
|
240
173
|
|
241
|
-
|
242
|
-
aggressive word-splitting and autophrase features enabled.
|
243
|
-
This field is just like text_en, except it adds
|
244
|
-
WordDelimiterFilter to enable splitting and matching of
|
245
|
-
words on case-change, alpha numeric boundaries, and
|
246
|
-
non-alphanumeric chars. This means certain compound word
|
247
|
-
cases will work, for example query "wi fi" will match
|
248
|
-
document "WiFi" or "wi-fi". However, other cases will still
|
249
|
-
not match, for example if the query is "wifi" and the
|
250
|
-
document is "wi fi" or if the query is "wi-fi" and the
|
251
|
-
document is "wifi".
|
252
|
-
-->
|
253
|
-
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
174
|
+
<fieldType name="identifier_match" class="solr.TextField" positionIncrementGap="100">
|
254
175
|
<analyzer type="index">
|
255
176
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
<filter class="solr.
|
262
|
-
<filter class="solr.
|
177
|
+
<filter class="solr.WordDelimiterGraphFilterFactory"
|
178
|
+
catenateWords="1"
|
179
|
+
catenateNumbers="1"
|
180
|
+
catenateAll="1"
|
181
|
+
/>
|
182
|
+
<filter class="solr.ICUFoldingFilterFactory" />
|
183
|
+
<filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
|
263
184
|
</analyzer>
|
264
185
|
<analyzer type="query">
|
265
186
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
266
|
-
<filter class="solr.
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
275
|
-
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
276
|
-
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
277
|
-
<analyzer>
|
278
|
-
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
279
|
-
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
280
|
-
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
|
281
|
-
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
282
|
-
<filter class="solr.LowerCaseFilterFactory"/>
|
283
|
-
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
284
|
-
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
285
|
-
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
286
|
-
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
287
|
-
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
288
|
-
</analyzer>
|
289
|
-
</fieldType>
|
290
|
-
|
291
|
-
<!-- Just like text_general except it reverses the characters of
|
292
|
-
each token, to enable more efficient leading wildcard queries. -->
|
293
|
-
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
294
|
-
<analyzer type="index">
|
295
|
-
<tokenizer class="solr.StandardTokenizerFactory"/>
|
296
|
-
<filter class="solr.LowerCaseFilterFactory"/>
|
297
|
-
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
298
|
-
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
299
|
-
</analyzer>
|
300
|
-
<analyzer type="query">
|
301
|
-
<tokenizer class="solr.StandardTokenizerFactory"/>
|
302
|
-
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
303
|
-
<filter class="solr.LowerCaseFilterFactory"/>
|
187
|
+
<filter class="solr.WordDelimiterGraphFilterFactory"
|
188
|
+
catenateWords="1"
|
189
|
+
catenateNumbers="1"
|
190
|
+
catenateAll="1"
|
191
|
+
/>
|
192
|
+
<filter class="solr.ICUFoldingFilterFactory" />
|
193
|
+
<filter class="solr.WordDelimiterGraphFilterFactory"/>
|
304
194
|
</analyzer>
|
305
195
|
</fieldType>
|
306
196
|
|
@@ -313,7 +203,7 @@
|
|
313
203
|
</analyzer>
|
314
204
|
</fieldType>
|
315
205
|
|
316
|
-
<fieldType class="solr.TextField"
|
206
|
+
<fieldType name="textSuggest" class="solr.TextField" positionIncrementGap="100">
|
317
207
|
<analyzer>
|
318
208
|
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
319
209
|
<filter class="solr.LowerCaseFilterFactory"/>
|
@@ -321,16 +211,6 @@
|
|
321
211
|
</analyzer>
|
322
212
|
</fieldType>
|
323
213
|
|
324
|
-
<!-- charFilter + WhitespaceTokenizer -->
|
325
|
-
<!--
|
326
|
-
<fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
|
327
|
-
<analyzer>
|
328
|
-
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
329
|
-
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
330
|
-
</analyzer>
|
331
|
-
</fieldType>
|
332
|
-
-->
|
333
|
-
|
334
214
|
<!-- This is an example of using the KeywordTokenizer along
|
335
215
|
With various TokenFilterFactories to produce a sortable field
|
336
216
|
that does not include some properties of the source text
|
@@ -364,71 +244,9 @@
|
|
364
244
|
</analyzer>
|
365
245
|
</fieldType>
|
366
246
|
|
367
|
-
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
368
|
-
<analyzer>
|
369
|
-
<tokenizer class="solr.StandardTokenizerFactory"/>
|
370
|
-
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
371
|
-
</analyzer>
|
372
|
-
</fieldtype>
|
373
|
-
|
374
|
-
<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
375
|
-
<analyzer>
|
376
|
-
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
377
|
-
<!--
|
378
|
-
The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
379
|
-
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
380
|
-
Attributes of the DelimitedPayloadTokenFilterFactory :
|
381
|
-
"delimiter" - a one character delimiter. Default is | (pipe)
|
382
|
-
"encoder" - how to encode the following value into a playload
|
383
|
-
float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
384
|
-
integer -> o.a.l.a.p.IntegerEncoder
|
385
|
-
identity -> o.a.l.a.p.IdentityEncoder
|
386
|
-
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
387
|
-
-->
|
388
|
-
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
389
|
-
</analyzer>
|
390
|
-
</fieldtype>
|
391
|
-
|
392
|
-
<!-- lowercases the entire field value, keeping it as a single token. -->
|
393
|
-
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
394
|
-
<analyzer>
|
395
|
-
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
396
|
-
<filter class="solr.LowerCaseFilterFactory" />
|
397
|
-
</analyzer>
|
398
|
-
</fieldType>
|
399
|
-
|
400
|
-
<fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
401
|
-
<analyzer>
|
402
|
-
<tokenizer class="solr.PathHierarchyTokenizerFactory"/>
|
403
|
-
</analyzer>
|
404
|
-
</fieldType>
|
405
|
-
|
406
|
-
<!-- since fields of this type are by default not stored or indexed,
|
407
|
-
any data added to them will be ignored outright. -->
|
408
|
-
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
409
|
-
|
410
|
-
<!-- This point type indexes the coordinates as separate fields (subFields)
|
411
|
-
If subFieldType is defined, it references a type, and a dynamic field
|
412
|
-
definition is created matching *___<typename>. Alternately, if
|
413
|
-
subFieldSuffix is defined, that is used to create the subFields.
|
414
|
-
Example: if subFieldType="double", then the coordinates would be
|
415
|
-
indexed in fields myloc_0___double,myloc_1___double.
|
416
|
-
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
417
|
-
in fields myloc_0_d,myloc_1_d
|
418
|
-
The subFields are an implementation detail of the fieldType, and end
|
419
|
-
users normally should not need to know about them.
|
420
|
-
-->
|
421
|
-
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
422
|
-
|
423
247
|
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
424
248
|
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
425
249
|
|
426
|
-
<!--
|
427
|
-
A Geohash is a compact representation of a latitude longitude pair in a single field.
|
428
|
-
See http://wiki.apache.org/solr/SpatialSearch
|
429
|
-
-->
|
430
|
-
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
431
|
-
|
432
250
|
<fieldType name="_nest_path_" class="solr.NestPathField" />
|
433
251
|
|
434
252
|
</types>
|
@@ -460,11 +278,12 @@
|
|
460
278
|
|
461
279
|
<!-- NOTE: this is not a full list of fields in the index; dynamic fields are also used -->
|
462
280
|
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
463
|
-
<field name="_version_" type="
|
464
|
-
<field name="timestamp" type="
|
281
|
+
<field name="_version_" type="plong" indexed="true" stored="true" multiValued="false" />
|
282
|
+
<field name="timestamp" type="pdate" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
465
283
|
<!-- default, catch all search field -->
|
466
284
|
<field name="text" type="text" indexed="true" stored="true" multiValued="true"/>
|
467
|
-
|
285
|
+
<field name="unitid_identifier_match" type="identifier_match" indexed="true" multiValued="true" />
|
286
|
+
|
468
287
|
<field name="_root_" type="string" indexed="true" stored="true" docValues="false" />
|
469
288
|
<field name="_nest_parent_" type="string" indexed="true" stored="true"/>
|
470
289
|
<field name="_nest_path_" type="_nest_path_" indexed="true" stored="true"/>
|
@@ -476,37 +295,9 @@
|
|
476
295
|
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
477
296
|
Longer patterns will be matched first. if equal size patterns
|
478
297
|
both match, the first appearing in the schema will be used. -->
|
479
|
-
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
480
|
-
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
481
|
-
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
482
|
-
<dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/>
|
483
|
-
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
484
|
-
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
485
|
-
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
486
|
-
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
487
|
-
|
488
|
-
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
489
|
-
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
490
|
-
|
491
|
-
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
492
|
-
<dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
493
|
-
|
494
|
-
<!-- some trie-coded dynamic fields for faster range queries -->
|
495
|
-
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
496
|
-
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
497
|
-
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
498
|
-
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
499
|
-
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
500
|
-
|
501
|
-
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
502
|
-
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
503
|
-
|
504
298
|
<dynamicField name="random_*" type="random" />
|
505
299
|
|
506
|
-
<dynamicField name="*_display" type="string" indexed="false" stored="true" multiValued="true" />
|
507
|
-
<dynamicField name="*_facet" type="string" indexed="true" stored="false" multiValued="true" />
|
508
300
|
<dynamicField name="*_sort" type="alphaNumericSort" indexed="true" stored="false" multiValued="false" />
|
509
|
-
<dynamicField name="*_unstem_search" type="text_general" indexed="true" stored="false" multiValued="true" />
|
510
301
|
<dynamicField name="*spell" type="textSpell" indexed="true" stored="false" multiValued="true" />
|
511
302
|
<dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
|
512
303
|
|
@@ -523,12 +314,8 @@
|
|
523
314
|
<dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true" />
|
524
315
|
<dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false" />
|
525
316
|
<dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true" />
|
526
|
-
<dynamicField name="*
|
527
|
-
<dynamicField name="*
|
528
|
-
<dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false" />
|
529
|
-
<dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true" />
|
530
|
-
<dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false" />
|
531
|
-
|
317
|
+
<dynamicField name="*_isim" type="pint" stored="true" indexed="true" multiValued="true" />
|
318
|
+
<dynamicField name="*_ii" type="pint" stored="false" indexed="true" multiValued="false" />
|
532
319
|
</fields>
|
533
320
|
|
534
321
|
<!-- Field to use to determine and enforce document uniqueness.
|
@@ -581,9 +368,7 @@
|
|
581
368
|
<copyField source="userestrict_teim" dest="text" />
|
582
369
|
<!-- grab structured data that's important -->
|
583
370
|
<copyField source="unitid_ssm" dest="text" />
|
584
|
-
|
585
|
-
<!-- unstemmed fields -->
|
586
|
-
<!-- <copyField source="title_t" dest="title_unstem_search"/> -->
|
371
|
+
<copyField source="unitid_ssm" dest="unitid_identifier_match" />
|
587
372
|
|
588
373
|
<!-- sort fields -->
|
589
374
|
<copyField source="normalized_title_ssm" dest="title_sort"/> <!-- TODO: assumes single values -->
|
@@ -602,30 +387,4 @@
|
|
602
387
|
<copyField source="places_ssim" dest="suggest"/>
|
603
388
|
<copyField source="access_subjects_ssim" dest="suggest"/>
|
604
389
|
|
605
|
-
<!-- Above, multiple source fields are copied to the [text] field.
|
606
|
-
Another way to map multiple source fields to the same
|
607
|
-
destination field is to use the dynamic field syntax.
|
608
|
-
copyField also supports a maxChars to copy setting. -->
|
609
|
-
|
610
|
-
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
611
|
-
|
612
|
-
<!-- copy name to alphaNameSort, a field designed for sorting by name -->
|
613
|
-
<!-- <copyField source="name" dest="alphaNameSort"/> -->
|
614
|
-
|
615
|
-
|
616
|
-
<!-- Similarity is the scoring routine for each document vs. a query.
|
617
|
-
A custom similarity may be specified here, but the default is fine
|
618
|
-
for most applications. -->
|
619
|
-
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
620
|
-
<!-- ... OR ...
|
621
|
-
Specify a SimilarityFactory class name implementation
|
622
|
-
allowing parameters to be used.
|
623
|
-
-->
|
624
|
-
<!--
|
625
|
-
<similarity class="com.example.solr.CustomSimilarityFactory">
|
626
|
-
<str name="paramkey">param value</str>
|
627
|
-
</similarity>
|
628
|
-
-->
|
629
|
-
|
630
|
-
|
631
390
|
</schema>
|