solr_ead 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +37 -0
- data/lib/solr_ead/behaviors.rb +4 -11
- data/lib/solr_ead/component.rb +1 -0
- data/lib/solr_ead/document.rb +1 -0
- data/lib/solr_ead/formatting.rb +56 -0
- data/lib/solr_ead/version.rb +1 -1
- data/lib/solr_ead.rb +1 -1
- data/spec/component_spec.rb +18 -5
- data/spec/fixtures/html_component.xml +20 -0
- data/spec/formatting_spec.rb +26 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NWJmOTVjOGQ3OWMwODIwNjE4NWYyYjA3ZjU4YmM5Y2IwMzI1YzQ0YQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDA5ZjRiMGI3OTBlYzYzZTM1NDdiYmUwNWRhZmIyYzg0YjEwMDRiZQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDcxMjhkZjNiYTgwOTZjMDRkZGQyMzgwOTdmYTkxZDE1YTkxOTI2YzQ3ODlm
|
10
|
+
ODk1MmJlZWY5MjQ1ZjM1ZWY3ODQ1MThiMDUzNTEyYjIzMjRjMDE0NTM3NTY3
|
11
|
+
ODIzMjU1NmFmZWRiZDBjYzk2MmRmYjcwMjA1MDEzM2U1YThmMDk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZmI5N2NhY2U3MzY3MWUwNDkyNmFjOGJhNGMzNTBhYmE4ODhhYTZmYWQzOGMw
|
14
|
+
ZGVmYmVlYjQyNmQ2MDUwZDkwYThjYjFkYjBlNjYxNDNiZjY4OTU3YzVmMzY3
|
15
|
+
NDc3MmU5ZGE3NGE2MzI4ODkxMGZiYjdlMjVkZDJjZmU1NmI2OWU=
|
data/README.md
CHANGED
@@ -73,6 +73,21 @@ will be able to apply any xslt processing you wish. Other solutions are possibl
|
|
73
73
|
xml from the document as well as the component, depending on the needs of your
|
74
74
|
application.
|
75
75
|
|
76
|
+
### EAD Formatting
|
77
|
+
|
78
|
+
EAD xml may contain formatted text such as:
|
79
|
+
|
80
|
+
<title render="italic">this is italicized</title>
|
81
|
+
|
82
|
+
When OM processes any node that contains formatted text, the formatted nodes will be ignored
|
83
|
+
and the text will appear without any of the `<title>` tags denoting format. If you wish
|
84
|
+
to have the formatting preserved as converted HTML, you may add the formatted string
|
85
|
+
to your solr document:
|
86
|
+
|
87
|
+
Solrizer.set_field(solr_doc, "title", self.term_to_html("title"), :displayable)
|
88
|
+
|
89
|
+
See the section on customization for more information.
|
90
|
+
|
76
91
|
## Customization
|
77
92
|
|
78
93
|
Chances are the default definitions are not sufficient for your needs. If you want to
|
@@ -202,6 +217,28 @@ solr. In order to have these fields index correctly, include the following in y
|
|
202
217
|
Note that the type "text_en" is dependent on your particular solr application, but the others should be
|
203
218
|
included in the default installation.
|
204
219
|
|
220
|
+
### Displaying HTML
|
221
|
+
|
222
|
+
For converting formatted ead nodes to HTML, override the term's contents in the `to_solr` method:
|
223
|
+
|
224
|
+
class CustomDocument < SolrEad::Document
|
225
|
+
|
226
|
+
use_terminology SolrEad::Document
|
227
|
+
|
228
|
+
def to_solr(solr_doc = Hash.new)
|
229
|
+
super(solr_doc)
|
230
|
+
Solrizer.set_field(solr_doc, "title", self.term_to_html("title"), :displayable)
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
The above example takes the title term as it is defined in `SolrEad::Document` and changes the contents
|
236
|
+
of its solr display field. In this case, the contents of the xml node for the "title" OM term are
|
237
|
+
processed by the `term_to_html` method which converts the ead xml to html and stores it in the solr
|
238
|
+
field given by the `set_field` method.
|
239
|
+
|
240
|
+
The details of conversion from ead xml to html are specified in `SolrEad::Formatting`.
|
241
|
+
|
205
242
|
## Issues
|
206
243
|
|
207
244
|
### eadid format
|
data/lib/solr_ead/behaviors.rb
CHANGED
@@ -2,6 +2,8 @@ require "sanitize"
|
|
2
2
|
|
3
3
|
module SolrEad::Behaviors
|
4
4
|
|
5
|
+
include SolrEad::Formatting
|
6
|
+
|
5
7
|
# Takes a file as its input and returns a Nokogiri::XML::NodeSet of component <c> nodes
|
6
8
|
#
|
7
9
|
# It'll make an attempt at substituting numbered component levels for non-numbered
|
@@ -102,23 +104,14 @@ module SolrEad::Behaviors
|
|
102
104
|
title = xml.at("/c/did/unittitle")
|
103
105
|
date = xml.at("/c/did/unitdate")
|
104
106
|
if !title.nil? and !title.content.empty?
|
105
|
-
return
|
107
|
+
return ead_to_html(title.content)
|
106
108
|
elsif !date.nil? and !date.content.empty?
|
107
|
-
return
|
109
|
+
return ead_to_html(date.content)
|
108
110
|
else
|
109
111
|
return "[No title available]"
|
110
112
|
end
|
111
113
|
end
|
112
114
|
|
113
|
-
# Converts formatting elements in the ead into html tags
|
114
|
-
def ead_clean_xml(string)
|
115
|
-
string.gsub!(/<title/,"<span")
|
116
|
-
string.gsub!(/<\/title/,"</span")
|
117
|
-
string.gsub!(/render=/,"class=")
|
118
|
-
sanitize = Sanitize.clean(string, :elements => ['span'], :attributes => {'span' => ['class']})
|
119
|
-
sanitize.gsub("\n",'').gsub(/\s+/, ' ').strip
|
120
|
-
end
|
121
|
-
|
122
115
|
# Returns true or false for a component with attached <c> child nodes.
|
123
116
|
def component_children?(node, t = Array.new)
|
124
117
|
node.children.each { |n| t << n.name }
|
data/lib/solr_ead/component.rb
CHANGED
data/lib/solr_ead/document.rb
CHANGED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sanitize'
|
2
|
+
|
3
|
+
module SolrEad::Formatting
|
4
|
+
|
5
|
+
RENDER_ATTRS =
|
6
|
+
{
|
7
|
+
"altrender" => "em",
|
8
|
+
"bold" => "strong",
|
9
|
+
"doublequote" => "em",
|
10
|
+
"bolddoublequote" => "strong",
|
11
|
+
"bolditalic" => "strong",
|
12
|
+
"boldsinglequote" => "strong",
|
13
|
+
"boldsmcaps" => "strong",
|
14
|
+
"boldunderline" => "strong",
|
15
|
+
"italic" => "em",
|
16
|
+
"italics" => "em",
|
17
|
+
"nonproport" => "em",
|
18
|
+
"singlequote" => "em",
|
19
|
+
"smcaps" => "em",
|
20
|
+
"sub" => "sub",
|
21
|
+
"super" => "sup",
|
22
|
+
"underline" => "em"
|
23
|
+
}
|
24
|
+
|
25
|
+
# If you're within the context of an OM::XML::Document, you can just pass the term you want converted and
|
26
|
+
# this will get the xml using the term.
|
27
|
+
def term_to_html term
|
28
|
+
ead_to_html self.send(term).nodeset.to_xml
|
29
|
+
end
|
30
|
+
|
31
|
+
# Use this method convert the xml directly
|
32
|
+
def ead_to_html xml
|
33
|
+
::Sanitize.clean(transform_render_attributes(xml), :elements => RENDER_ATTRS.values.uniq )
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def transform_render_attributes xml
|
39
|
+
::Sanitize.clean(xml, :transformers => transformer)
|
40
|
+
end
|
41
|
+
|
42
|
+
def transformer
|
43
|
+
lambda do |env|
|
44
|
+
convert_ead_tag_to_html(env[:node])
|
45
|
+
{:node_whitelist => [env[:node]]}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def convert_ead_tag_to_html node
|
50
|
+
if RENDER_ATTRS.keys.include? node["render"]
|
51
|
+
node.name = RENDER_ATTRS[node["render"]]
|
52
|
+
node.remove_attribute "render"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
data/lib/solr_ead/version.rb
CHANGED
data/lib/solr_ead.rb
CHANGED
@@ -7,6 +7,7 @@ require 'active_support'
|
|
7
7
|
module SolrEad
|
8
8
|
extend ActiveSupport::Autoload
|
9
9
|
|
10
|
+
autoload :Formatting
|
10
11
|
autoload :Behaviors
|
11
12
|
autoload :OmBehaviors
|
12
13
|
autoload :Indexer
|
@@ -14,7 +15,6 @@ module SolrEad
|
|
14
15
|
autoload :Component
|
15
16
|
autoload :Railtie if defined?(Rails)
|
16
17
|
|
17
|
-
|
18
18
|
def self.version
|
19
19
|
SolrEad::VERSION
|
20
20
|
end
|
data/spec/component_spec.rb
CHANGED
@@ -2,13 +2,13 @@ require "spec_helper"
|
|
2
2
|
|
3
3
|
describe SolrEad::Component do
|
4
4
|
|
5
|
-
before(:all) do
|
6
|
-
file = "component_template.xml"
|
7
|
-
@doc = SolrEad::Component.from_xml(fixture file)
|
8
|
-
end
|
9
|
-
|
10
5
|
describe "the solr document" do
|
11
6
|
|
7
|
+
before :all do
|
8
|
+
file = "component_template.xml"
|
9
|
+
@doc = SolrEad::Component.from_xml(fixture file)
|
10
|
+
end
|
11
|
+
|
12
12
|
describe "for item-level components" do
|
13
13
|
|
14
14
|
before :each do
|
@@ -52,4 +52,17 @@ describe SolrEad::Component do
|
|
52
52
|
|
53
53
|
end
|
54
54
|
|
55
|
+
describe "formatting fields as html" do
|
56
|
+
|
57
|
+
before :all do
|
58
|
+
file = "html_component.xml"
|
59
|
+
@sample = SolrEad::Component.from_xml(fixture file)
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should format as term as html" do
|
63
|
+
@sample.term_to_html("scopecontent").should include "<em>OPAL</em> "
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
55
68
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
<c id="ref202" level="series">
|
2
|
+
<did>
|
3
|
+
<unittitle>Series VIII: Miscellaneous</unittitle>
|
4
|
+
<unitdate>1960</unitdate>
|
5
|
+
</did>
|
6
|
+
<scopecontent id="ref215">
|
7
|
+
<head>Scope and Contents</head>
|
8
|
+
<p>
|
9
|
+
Series VIII: Miscellaneous, 1960, contains a single issue of the pocket magazine <title render="italic">OPAL</title>, from Pride Publications (Cleveland, Ohio). The staff of <title render="italic">OPAL</title> included Valena M. Williams, executive editor; John Bentley, art director; and Nathaniel Hubbard, circulation manager. Volume 1, issue 5 includes work by photographers James Gayle, Anderson Marlow, and Harvey Bowie; columnists Bill Clark, Harrison Dillard, Edward Jones, William Matlock, and Mary Zachary; and artwork by cartoonist Ted Walker. It is possible one of the photographs in the magazine could be by Baynes, but none are cited as such. The publication contains information from members of the local community on birthday celebrations and other upcoming events, contest winners, and fashion, as well as longer articles on jazz, the importance of education and peaceful protests, the influence of disc jockeys and popular music on teens, WJMO's middle school student disc jockeys, singer Nancy Wilson, and Isabelle Cooley, co-star of the feature film <title render="italic">I Passed for White</title>.
|
10
|
+
</p>
|
11
|
+
</scopecontent>
|
12
|
+
<c id="ref111" level="file">
|
13
|
+
<did>
|
14
|
+
<unittitle>Cleveland Opal, Volume 1, Issue 5</unittitle>
|
15
|
+
<container id="cid1324121" type="Box" label="Periodicals">2</container>
|
16
|
+
<container parent="cid1324121" type="Folder">40</container>
|
17
|
+
<unitdate>1960 May 18</unitdate>
|
18
|
+
</did>
|
19
|
+
</c>
|
20
|
+
</c>
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe SolrEad::Formatting do
|
4
|
+
|
5
|
+
before :all do
|
6
|
+
class SampleClass
|
7
|
+
include SolrEad::Formatting
|
8
|
+
end
|
9
|
+
@sample = SampleClass.new
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "#ead_to_html" do
|
13
|
+
|
14
|
+
it "should convert ead markup to html" do
|
15
|
+
xml = 'This is some text with <title render="italics">italics</title> included in it.'
|
16
|
+
@sample.ead_to_html(xml).should == 'This is some text with <em>italics</em> included in it.'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should remove other tags" do
|
20
|
+
xml = 'Blah blah <title render="italics">italics</title> blah <span>blah</span> <title render="bold">italics</title>'
|
21
|
+
@sample.ead_to_html(xml).should == 'Blah blah <em>italics</em> blah blah <strong>italics</strong>'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solr_ead
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Wead
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-11-
|
11
|
+
date: 2013-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: om
|
@@ -187,6 +187,7 @@ files:
|
|
187
187
|
- lib/solr_ead/behaviors.rb
|
188
188
|
- lib/solr_ead/component.rb
|
189
189
|
- lib/solr_ead/document.rb
|
190
|
+
- lib/solr_ead/formatting.rb
|
190
191
|
- lib/solr_ead/indexer.rb
|
191
192
|
- lib/solr_ead/om_behaviors.rb
|
192
193
|
- lib/solr_ead/railtie.rb
|
@@ -204,7 +205,9 @@ files:
|
|
204
205
|
- spec/fixtures/ead_messy_format.xml
|
205
206
|
- spec/fixtures/ead_sample.xml
|
206
207
|
- spec/fixtures/ead_template.xml
|
208
|
+
- spec/fixtures/html_component.xml
|
207
209
|
- spec/fixtures/pp002010.xml
|
210
|
+
- spec/formatting_spec.rb
|
208
211
|
- spec/indexer_spec.rb
|
209
212
|
- spec/spec_helper.rb
|
210
213
|
homepage: http://github.com/awead/solr_ead
|
@@ -241,7 +244,9 @@ test_files:
|
|
241
244
|
- spec/fixtures/ead_messy_format.xml
|
242
245
|
- spec/fixtures/ead_sample.xml
|
243
246
|
- spec/fixtures/ead_template.xml
|
247
|
+
- spec/fixtures/html_component.xml
|
244
248
|
- spec/fixtures/pp002010.xml
|
249
|
+
- spec/formatting_spec.rb
|
245
250
|
- spec/indexer_spec.rb
|
246
251
|
- spec/spec_helper.rb
|
247
252
|
has_rdoc:
|